diff --git a/pkgdown.yml b/pkgdown.yml
index 0c14636..d004882 100644
--- a/pkgdown.yml
+++ b/pkgdown.yml
@@ -4,7 +4,7 @@ pkgdown_sha: ~
 articles:
   case-study: case-study.html
   using-reclanc: using-reclanc.html
-last_built: 2024-07-22T14:51Z
+last_built: 2024-07-22T18:41Z
 urls:
   reference: https://kaiaragaki.github.io/reclanc/reference
   article: https://kaiaragaki.github.io/reclanc/articles
diff --git a/reference/predict.clanc.html b/reference/predict.clanc.html
index b6379f9..38bf951 100644
--- a/reference/predict.clanc.html
+++ b/reference/predict.clanc.html
@@ -70,6 +70,11 @@ <h2 id="arguments">Arguments<a class="anchor" aria-label="anchor" href="#argumen
 assay.</p></dd>
 
 
+<dt id="arg-format">format<a class="anchor" aria-label="anchor" href="#arg-format"></a></dt>
+<dd><p>Character. Are the data "wide" (default), with genes as
+columns, or "tall", with genes as rows?</p></dd>
+
+
 <dt id="arg--">...<a class="anchor" aria-label="anchor" href="#arg--"></a></dt>
 <dd><p>Not used, but required for extensibility.</p></dd>
 
@@ -77,11 +82,6 @@ <h2 id="arguments">Arguments<a class="anchor" aria-label="anchor" href="#argumen
 <dt id="arg-method">method<a class="anchor" aria-label="anchor" href="#arg-method"></a></dt>
 <dd><p>If <code>type</code> is <code>numeric</code>, the method of correlation</p></dd>
 
-
-<dt id="arg-format-">format.<a class="anchor" aria-label="anchor" href="#arg-format-"></a></dt>
-<dd><p>Character. Are the data "wide" (default), with genes as
-columns, or "tall", with genes as rows?</p></dd>
-
 </dl></div>
     <div class="section level2">
     <h2 id="value">Value<a class="anchor" aria-label="anchor" href="#value"></a></h2>
diff --git a/search.json b/search.json
index 57ccba4..9ef5db0 100644
--- a/search.json
+++ b/search.json
@@ -1 +1 @@
-[{"path":"https://kaiaragaki.github.io/reclanc/CONTRIBUTING.html","id":null,"dir":"","previous_headings":"","what":"Contributing to reclanc","title":"Contributing to reclanc","text":"outlines propose change reclanc.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/CONTRIBUTING.html","id":"fixing-typos","dir":"","previous_headings":"","what":"Fixing typos","title":"Contributing to reclanc","text":"can fix typos, spelling mistakes, grammatical errors documentation directly using GitHub web interface, long changes made source file. generally means ’ll need edit roxygen2 comments .R, .Rd file. can find .R file generates .Rd reading comment first line.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/CONTRIBUTING.html","id":"bigger-changes","dir":"","previous_headings":"","what":"Bigger changes","title":"Contributing to reclanc","text":"want make bigger change, ’s good idea first file issue make sure someone team agrees ’s needed. ’ve found bug, please file issue illustrates bug minimal reprex (also help write unit test, needed). See guide create great issue advice.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/CONTRIBUTING.html","id":"pull-request-process","dir":"","previous_headings":"Bigger changes","what":"Pull request process","title":"Contributing to reclanc","text":"Fork package clone onto computer. haven’t done , recommend using usethis::create_from_github(\"KaiAragaki/reclanc\", fork = TRUE). Install development dependencies devtools::install_dev_deps(), make sure package passes R CMD check running devtools::check(). R CMD check doesn’t pass cleanly, ’s good idea ask help continuing. Create Git branch pull request (PR). recommend using usethis::pr_init(\"brief-description--change\"). Make changes, commit git, create PR running usethis::pr_push(), following prompts browser. title PR briefly describe change. body PR contain Fixes #issue-number. user-facing changes, add bullet top NEWS.md (.e. just first header). Follow style described https://style.tidyverse.org/news.html.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/CONTRIBUTING.html","id":"code-style","dir":"","previous_headings":"Bigger changes","what":"Code style","title":"Contributing to reclanc","text":"New code follow tidyverse style guide. can use styler package apply styles, please don’t restyle code nothing PR. use roxygen2, Markdown syntax, documentation. use testthat unit tests. Contributions test cases included easier accept.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/CONTRIBUTING.html","id":"code-of-conduct","dir":"","previous_headings":"","what":"Code of Conduct","title":"Contributing to reclanc","text":"Please note reclanc project released Contributor Code Conduct. contributing project agree abide terms.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2024 reclanc authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/articles/case-study.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"case-study","text":"Let’s consider relatively full-featured, practical use case reclanc. vignette, ’ll go basics fitting models, well leverage tidymodels elaborate things like resampling tuning hyperparameters. ’ll fit final model, use predict subtypes entirely new dataset. vignette tries assume little knowledge machine learning tidymodels.","code":""},{"path":[]},{"path":"https://kaiaragaki.github.io/reclanc/articles/case-study.html","id":"a-simple-fit","dir":"Articles","previous_headings":"Fitting","what":"A simple fit","title":"case-study","text":"Let’s start fitting procedure. first need gene expression data. data ’m using Sjödahl et al. (2012). contains RNA expression 308 bladder cancer tumors. paper, Sjödahl et al. used transcriptional data classify tumors seven molecular subtypes (MS): ’d like apply subtype framework datasets. , first need generate centroids. can begin, though, need convert outcomes factors. case, outcomes molecular subtypes: simplest form, since clanc accepts ExpressionSet objects, following done : problem method, though, idea good fit . active argument specifies number genes used distinguishing features given class. case, class find 5 genes expression patterns peculiar given molecular subtype, subtype 7 (total number subtypes) x 5 (number active genes) = 35 genes (see blog post - better yet - original paper details). gotten better fit genes? selecting genes need? know?","code":"lund <- s3readRDS(\"lund.rds\", \"reclanc-lund\", region = \"us-east-2\") lund #> ExpressionSet (storageMode: lockedEnvironment) #> assayData: 16940 features, 308 samples  #>   element names: exprs  #> protocolData: none #> phenoData #>   sampleNames: UC_0001_1 UC_0002_1 ... UC_0785_1 (308 total) #>   varLabels: title source ... sample (16 total) #>   varMetadata: labelDescription #> featureData: none #> experimentData: use 'experimentData(object)' #> Annotation: table(lund$molecular_subtype) #>  #>    MS1a    MS1b  MS2a.1  MS2a.2  MS2b.1 MS2b2.1 MS2b2.2  #>      53      78      30      55      43      20      29 lund$molecular_subtype <- factor(lund$molecular_subtype) simple_centroids <- clanc(lund, classes = \"molecular_subtype\", active = 5) head(simple_centroids$centroids) #>   class    gene expression pooled_sd active     prior #> 1  MS1a   CXCL1   6.534490 0.8749133      5 0.1428571 #> 2  MS1a     MMD   7.922508 0.6429620      5 0.1428571 #> 3  MS1a C9orf19   8.378910 0.7510552      5 0.1428571 #> 4  MS1a    BNC1   5.297095 0.2106762      5 0.1428571 #> 5  MS1a  SLFN11   7.362887 0.6824663      5 0.1428571 #> 6  MS1a    CRAT   6.004517 0.3425669      5 0.1428571"},{"path":"https://kaiaragaki.github.io/reclanc/articles/case-study.html","id":"setting-the-stage-for-more-elaborate-analyses","dir":"Articles","previous_headings":"Fitting","what":"Setting the stage for more elaborate analyses","title":"case-study","text":"can get started tackling larger questions, let’s take brief detour land tidymodels. tidymodels collection packages make running tuning algorithms like much less painful much standardized. order leverage tidymodels, need buy-data structures. (Aside: don’t mean make buy-sound begrudging. say need, really mean : ’re going specifying long formulas, reason R really, really hates. Emil Hvitfeldt recently (time writing) allowed tidymodels handle long formulas gracefully, using tidymodels infrastructure gift, chore.) Many tidymodels workflows begin model specification. rationale behind separate model specification step model fitting step (whereas base R, generally happen ). reclanc makes easy specify model adding custom engine parsnip::discrim_linear, specifying model looks like : mod doesn’t anything - ’s kind point: specifies model later fit , doesn’t fitting . allows us reuse specification across code. next step wrangle data bit ‘wide’ format, columns outcomes (classes) predictors (genes), rows observations (samples): Finally, specify formula fitting model. uses recipes package tidymodels. delightful package can help preprocess data, ’s scope vignette. Instead, just think way specify formula keeps R blowing : can bundle model specification (mod) preprocessing steps (recipe, just formula) workflow: Now can fit model: ’ll notice results saw previously, demonstrating ’re using tidymodels rather base R, ’re still thing.","code":"library(tidymodels) mod <- discrim_linear() |>   set_engine(     engine = \"clanc\", # Note: \"clanc\", not \"reclanc\"     active = 5   ) wrangled <- data.frame(class = lund$molecular_subtype, t(exprs(lund))) head(wrangled[1:5]) #>            class LOC23117   FCGR2B    TRIM44 C15orf39 #> UC_0001_1   MS1b 5.565262 5.306654  9.305053 6.430063 #> UC_0002_1 MS2b.1 5.505854 5.731128  9.242790 7.265748 #> UC_0003_1 MS2a.2 5.336140 5.540470  9.888668 7.244976 #> UC_0006_2 MS2b.1 5.576748 5.847743  9.408895 7.377358 #> UC_0007_1 MS2a.2 5.414919 5.510507 10.482469 6.435552 #> UC_0008_1 MS2b.1 5.279174 5.633093  9.112754 7.057977 # Note that the recipe requires 'template data' recipe <- recipe(class ~ ., wrangled) wf <- workflow() |>   add_recipe(recipe) |>   add_model(mod) wf #> ══ Workflow ════════════════════════════════════════════════════════════════════ #> Preprocessor: Recipe #> Model: discrim_linear() #>  #> ── Preprocessor ──────────────────────────────────────────────────────────────── #> 0 Recipe Steps #>  #> ── Model ─────────────────────────────────────────────────────────────────────── #> Linear Discriminant Model Specification (classification) #>  #> Engine-Specific Arguments: #>   active = 5 #>  #> Computational engine: clanc tidymodels_fit <- fit(wf, data = wrangled) head(extract_fit_parsnip(tidymodels_fit)$fit$centroids) #>   class    gene expression pooled_sd active     prior #> 1  MS1a   CXCL1   6.534490 0.8749133      5 0.1428571 #> 2  MS1a     MMD   7.922508 0.6429620      5 0.1428571 #> 3  MS1a C9orf19   8.378910 0.7510552      5 0.1428571 #> 4  MS1a    BNC1   5.297095 0.2106762      5 0.1428571 #> 5  MS1a  SLFN11   7.362887 0.6824663      5 0.1428571 #> 6  MS1a    CRAT   6.004517 0.3425669      5 0.1428571"},{"path":"https://kaiaragaki.github.io/reclanc/articles/case-study.html","id":"measuring-fit-accuracy-with-cross-validation","dir":"Articles","previous_headings":"Fitting","what":"Measuring fit accuracy with cross-validation","title":"case-study","text":"Now ’ve dialed tidymodels framework, can lot elaborate things ease. One concerns whether 5 active genes good choice (active = 5). somewhat simple way determine good choice 5 genes use cross-validation. Cross-validation allows us test good fit training model , say, 80% data, testing rest (see Wikipedia diagram k-fold cross validation). allows us get measure good fit , without break actual test data - general used ’re ready finalize model. Speaking test data, let’s go ahead split now. ’ll lock test data away use ’ve fit final model. , ’ll use cross validation assess good fit , essentially using training data testing data. course, tidymodels makes easy , using rsample::initial_split: train test just subsets original data, containing 80% 20% original data (respectively). also tries maintain relative proportions classes within datasets (set strata = class): Creating folds cross validation nearly initial_split: can reuse workflow wf, contains model formula. difference use fit_resamples, specify metric want use measure good fit (remember every fold chunk data uses test fit). simplicity, let’s use accuracy: can extract accuracy metrics using collect_metrics, roots around fits helpfully extracts metrics, aggregates , calculated standard error: model accuracy 74%. Applying model testing data: Note testing data accuracy (%) approximates training data accuracy (74%).","code":"set.seed(123) splits <- initial_split(wrangled, prop = 0.8, strata = class) train <- training(splits) test <- testing(splits) round(prop.table(table(train$class)), 2) #>  #>    MS1a    MS1b  MS2a.1  MS2a.2  MS2b.1 MS2b2.1 MS2b2.2  #>    0.17    0.25    0.10    0.18    0.15    0.07    0.08 round(prop.table(table(test$class)), 2) #>  #>    MS1a    MS1b  MS2a.1  MS2a.2  MS2b.1 MS2b2.1 MS2b2.2  #>    0.19    0.27    0.08    0.16    0.11    0.05    0.16 folds <- vfold_cv(train, v = 5, strata = class) folds #> #  5-fold cross-validation using stratification  #> # A tibble: 5 × 2 #>   splits           id    #>   <list>           <chr> #> 1 <split [193/51]> Fold1 #> 2 <split [193/51]> Fold2 #> 3 <split [195/49]> Fold3 #> 4 <split [197/47]> Fold4 #> 5 <split [198/46]> Fold5 fits <- fit_resamples(   wf,   folds,   metrics = metric_set(accuracy) ) #> 35/35 (100%) genes in centroids found in data #> 35/35 (100%) genes in centroids found in data #> 35/35 (100%) genes in centroids found in data #> 35/35 (100%) genes in centroids found in data #> 35/35 (100%) genes in centroids found in data fits #> # Resampling results #> # 5-fold cross-validation using stratification  #> # A tibble: 5 × 4 #>   splits           id    .metrics         .notes           #>   <list>           <chr> <list>           <list>           #> 1 <split [193/51]> Fold1 <tibble [1 × 4]> <tibble [0 × 3]> #> 2 <split [193/51]> Fold2 <tibble [1 × 4]> <tibble [0 × 3]> #> 3 <split [195/49]> Fold3 <tibble [1 × 4]> <tibble [0 × 3]> #> 4 <split [197/47]> Fold4 <tibble [1 × 4]> <tibble [0 × 3]> #> 5 <split [198/46]> Fold5 <tibble [1 × 4]> <tibble [0 × 3]> metrics <- collect_metrics(fits) metrics #> # A tibble: 1 × 6 #>   .metric  .estimator  mean     n std_err .config              #>   <chr>    <chr>      <dbl> <int>   <dbl> <chr>                #> 1 accuracy multiclass 0.737     5  0.0289 Preprocessor1_Model1 # Fit a model using *all* of our training data final_fit <- clanc(class ~ ., train, active = 5)  # Use it to predict the (known) classes of our test data preds <- predict(final_fit, new_data = test, type = \"class\") #> 35/35 (100%) genes in centroids found in data w_preds <- cbind(preds, test) # Compare known class vs predicted class metric <- accuracy(w_preds, class, .pred_class) metric #> # A tibble: 1 × 3 #>   .metric  .estimator .estimate #>   <chr>    <chr>          <dbl> #> 1 accuracy multiclass     0.734"},{"path":"https://kaiaragaki.github.io/reclanc/articles/case-study.html","id":"tuning-hyperparameters-with-tune","dir":"Articles","previous_headings":"Fitting","what":"Tuning hyperparameters with tune","title":"case-study","text":"Now least measure good model fits, better genes? get away fewer? Running command different numbers drag - fortunately, ’s yet another beautiful package help us: tune. use tune, need re-specify model let tune know parameters want tune: update previous workflow using update_model, let’s just declare new one: specify range values active try: can fit folds using spread values chose: , can collect metrics - time, however, summary metrics values active: graphically:  looks like read maximal accuracy around 21 genes - let’s choose 20 genes nice round number: looks like accuracy little better now ’ve chosen optimal number active genes.","code":"tune_mod <- discrim_linear() |>   set_engine(     engine = \"clanc\",     active = tune()   ) tune_wf <- workflow() |>   add_recipe(recipe) |>   add_model(tune_mod) values <- data.frame(active = seq(from = 1, to = 50, by = 4)) values #>    active #> 1       1 #> 2       5 #> 3       9 #> 4      13 #> 5      17 #> 6      21 #> 7      25 #> 8      29 #> 9      33 #> 10     37 #> 11     41 #> 12     45 #> 13     49 # This is going to take some time, since we're fitting 5 folds 13 times each. tuned <- tune_grid(   tune_wf,   folds,   metrics = metric_set(accuracy),   grid = values ) tuned #> # Tuning results #> # 5-fold cross-validation using stratification  #> # A tibble: 5 × 4 #>   splits           id    .metrics          .notes           #>   <list>           <chr> <list>            <list>           #> 1 <split [193/51]> Fold1 <tibble [13 × 5]> <tibble [0 × 3]> #> 2 <split [193/51]> Fold2 <tibble [13 × 5]> <tibble [0 × 3]> #> 3 <split [195/49]> Fold3 <tibble [13 × 5]> <tibble [0 × 3]> #> 4 <split [197/47]> Fold4 <tibble [13 × 5]> <tibble [0 × 3]> #> 5 <split [198/46]> Fold5 <tibble [13 × 5]> <tibble [0 × 3]> tuned_metrics <- collect_metrics(tuned) tuned_metrics #> # A tibble: 13 × 7 #>    active .metric  .estimator  mean     n std_err .config               #>     <dbl> <chr>    <chr>      <dbl> <int>   <dbl> <chr>                 #>  1      1 accuracy multiclass 0.585     5  0.0368 Preprocessor1_Model01 #>  2      5 accuracy multiclass 0.737     5  0.0289 Preprocessor1_Model02 #>  3      9 accuracy multiclass 0.748     5  0.0496 Preprocessor1_Model03 #>  4     13 accuracy multiclass 0.781     5  0.0403 Preprocessor1_Model04 #>  5     17 accuracy multiclass 0.770     5  0.0280 Preprocessor1_Model05 #>  6     21 accuracy multiclass 0.774     5  0.0335 Preprocessor1_Model06 #>  7     25 accuracy multiclass 0.785     5  0.0378 Preprocessor1_Model07 #>  8     29 accuracy multiclass 0.794     5  0.0319 Preprocessor1_Model08 #>  9     33 accuracy multiclass 0.773     5  0.0281 Preprocessor1_Model09 #> 10     37 accuracy multiclass 0.790     5  0.0295 Preprocessor1_Model10 #> 11     41 accuracy multiclass 0.794     5  0.0339 Preprocessor1_Model11 #> 12     45 accuracy multiclass 0.815     5  0.0267 Preprocessor1_Model12 #> 13     49 accuracy multiclass 0.815     5  0.0277 Preprocessor1_Model13 ggplot(tuned_metrics, aes(active, mean)) +   geom_line() +   coord_cartesian(ylim = c(0, 1)) +   labs(x = \"Number Active Genes\", y = \"Accuracy\") final_fit_tuned <- clanc(class ~ ., data = train, active = 20) # Use it to predict the (known) classes of our test data: preds <- predict(final_fit_tuned, new_data = test, type = \"class\") #> 140/140 (100%) genes in centroids found in data w_preds <- cbind(preds, test) # Compare known class vs predicted class: metric <- accuracy(w_preds, class, .pred_class) metric #> # A tibble: 1 × 3 #>   .metric  .estimator .estimate #>   <chr>    <chr>          <dbl> #> 1 accuracy multiclass     0.812"},{"path":"https://kaiaragaki.github.io/reclanc/articles/case-study.html","id":"predicting","dir":"Articles","previous_headings":"","what":"Predicting","title":"case-study","text":"Now want apply classifier new data. second dataset RNAseq data 30 bladder cancer cell lines: Predicting incredibly simple. Since ’re using different sequencing method (RNAseq vs array-based sequencing), probably makes sense use correlation based classification rather original distance-based metric used original ClaNC package. can specifying type = \"numeric\" whatever correlation method prefer.  Sjödahl paper, seven subtypes simplified five subtypes merging two similar biological pathways activated. ease interpretation, can :","code":"library(cellebrate) cell_rna #> class: DESeqDataSet  #> dim: 18548 30  #> metadata(1): version #> assays(2): counts rlog_norm_counts #> rownames(18548): TSPAN6 TNMD ... MT-ND5 MT-ND6 #> rowData names(0): #> colnames(30): 1A6 253JP ... UC7 UC9 #> colData names(5): cell bsl lum call clade cell_preds <- predict(   final_fit_tuned,   cell_rna,   assay = 2,   type = \"numeric\",   method = \"spearman\" ) #> 118/140 (84%) genes in centroids found in data  out <- cbind(colData(cell_rna), cell_preds) |>   as_tibble()  out #> # A tibble: 30 × 12 #>    cell     bsl    lum call  clade            .pred_MS1a .pred_MS1b .pred_MS2a.1 #>    <chr>  <dbl>  <dbl> <chr> <fct>                 <dbl>      <dbl>        <dbl> #>  1 1A6     99.0   1.02 BSL   Epithelial Other     0.0600      0.224        0.149 #>  2 253JP   76.6  23.4  BSL   Unknown              0.0574      0.240        0.219 #>  3 5637    98.5   1.46 BSL   Epithelial Other     0.0958      0.243        0.160 #>  4 BV      49.9  50.1  LUM   Unknown              0.0758      0.262        0.238 #>  5 HT1197  56.0  44.0  BSL   Epithelial Other     0.119       0.288        0.224 #>  6 HT1376  10.9  89.1  LUM   Epithelial Other     0.100       0.277        0.238 #>  7 J82     98.1   1.91 BSL   Mesenchymal          0.127       0.292        0.219 #>  8 RT112    0   100    LUM   Luminal Papilla…     0.173       0.380        0.294 #>  9 RT4      0   100    LUM   Luminal Papilla…     0.134       0.317        0.257 #> 10 RT4V6    0   100    LUM   Luminal Papilla…     0.143       0.207        0.165 #> # ℹ 20 more rows #> # ℹ 4 more variables: .pred_MS2a.2 <dbl>, .pred_MS2b.1 <dbl>, #> #   .pred_MS2b2.1 <dbl>, .pred_MS2b2.2 <dbl> plotting_data <- out |>   pivot_longer(cols = starts_with(\".pred\"))  plotting_data |>   ggplot(aes(cell, value, color = name)) +   geom_point() +   facet_grid(~clade, scales = \"free_x\", space = \"free_x\") table <- plotting_data |>   summarize(winner = name[which.max(value)], .by = c(cell, clade)) |>   mutate(     five = case_when(       winner %in% c(\".pred_MS1a\", \".pred_MS1b\") ~ \"Urobasal A\",       winner %in% c(\".pred_MS2a.1\", \".pred_MS2a.2\") ~ \"Genomically unstable\",       winner == \".pred_MS2b.1\" ~ \"Infiltrated\",       winner == \".pred_MS2b2.1\" ~ \"Uro-B\",       winner == \".pred_MS2b2.2\" ~ \"SCC-like\"     )   ) |>   relocate(cell, five, clade)  print(table, n = 30) #> # A tibble: 30 × 4 #>    cell   five                 clade             winner        #>    <chr>  <chr>                <fct>             <chr>         #>  1 1A6    SCC-like             Epithelial Other  .pred_MS2b2.2 #>  2 253JP  SCC-like             Unknown           .pred_MS2b2.2 #>  3 5637   SCC-like             Epithelial Other  .pred_MS2b2.2 #>  4 BV     Urobasal A           Unknown           .pred_MS1b    #>  5 HT1197 SCC-like             Epithelial Other  .pred_MS2b2.2 #>  6 HT1376 SCC-like             Epithelial Other  .pred_MS2b2.2 #>  7 J82    Urobasal A           Mesenchymal       .pred_MS1b    #>  8 RT112  Urobasal A           Luminal Papillary .pred_MS1b    #>  9 RT4    Urobasal A           Luminal Papillary .pred_MS1b    #> 10 RT4V6  Urobasal A           Luminal Papillary .pred_MS1b    #> 11 SCaBER SCC-like             Epithelial Other  .pred_MS2b2.2 #> 12 SW780  Urobasal A           Luminal Papillary .pred_MS1b    #> 13 T24    SCC-like             Mesenchymal       .pred_MS2b2.2 #> 14 TCCSup SCC-like             Mesenchymal       .pred_MS2b2.2 #> 15 UC10   SCC-like             Epithelial Other  .pred_MS2b2.2 #> 16 UC11   SCC-like             Mesenchymal       .pred_MS2b2.2 #> 17 UC12   Urobasal A           Mesenchymal       .pred_MS1b    #> 18 UC13   SCC-like             Mesenchymal       .pred_MS2b2.2 #> 19 UC14   Urobasal A           Luminal Papillary .pred_MS1b    #> 20 UC15   SCC-like             Epithelial Other  .pred_MS2b2.2 #> 21 UC16   SCC-like             Epithelial Other  .pred_MS2b2.2 #> 22 UC17   SCC-like             Luminal Papillary .pred_MS2b2.2 #> 23 UC18   SCC-like             Mesenchymal       .pred_MS2b2.2 #> 24 UC1    Urobasal A           Luminal Papillary .pred_MS1b    #> 25 UC3    SCC-like             Mesenchymal       .pred_MS2b2.2 #> 26 UC4    Urobasal A           Unknown           .pred_MS1b    #> 27 UC5    Urobasal A           Luminal Papillary .pred_MS1b    #> 28 UC6    Urobasal A           Luminal Papillary .pred_MS1b    #> 29 UC7    Urobasal A           Epithelial Other  .pred_MS1b    #> 30 UC9    Genomically unstable Epithelial Other  .pred_MS2a.1"},{"path":"https://kaiaragaki.github.io/reclanc/articles/using-reclanc.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"using-reclanc","text":"vignette provide brief introduction basic usage reclanc. ’re interested reclanc works, ’d recommend reading blog post wrote original paper Alan Dabney, created original ClaNC algorithm.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/articles/using-reclanc.html","id":"fitting","dir":"Articles","previous_headings":"","what":"Fitting","title":"using-reclanc","text":"create new centroids existing expression data, use clanc function. reclanc provides synthetic expression data can work : data include 12 samples, 6 class “” 6 class “B”, 100 genes. reclanc able ingest variety common formats expression data. Objects like SummarizedExperiments ExpressionSets frequently used bioinformatic analyses arrange data samples columns genes rows. conflict expected formula input base R, predictors (genes) outcomes (classes) columns. reclanc eases friction expecting input common format, abstracting away wrangling aspect analysis. , two broad categories input - ‘wide data’ ‘tall data’.","code":"library(reclanc) library(SummarizedExperiment) library(parsnip) lapply(synthetic_expression, head) #> $expression #>        sample1  sample2  sample3  sample4  sample5  sample6  sample7  sample8 #> gene1 8.097529 7.119188 7.304400 7.554689 7.953206 7.714925 7.512700 8.597547 #> gene2 8.641837 9.400416 8.500865 8.878687 8.318438 8.728683 7.812591 7.638167 #> gene3 3.436236 4.317915 3.435193 3.515755 3.024976 4.762209 5.048956 2.006646 #> gene4 4.368008 5.212750 4.618249 4.201365 3.195294 4.707750 5.126769 6.178658 #> gene5 2.423974 3.563816 4.062362 2.163278 2.021435 2.813873 0.000000 4.652358 #> gene6 5.371205 5.919809 4.366915 4.805534 4.834856 5.622157 3.883531 3.593082 #>        sample9 sample10 sample11 sample12 #> gene1 6.475641 7.648858 8.637526 7.345038 #> gene2 8.110285 7.906104 7.424728 7.927039 #> gene3 2.739211 3.111668 3.161077 4.306611 #> gene4 5.170265 4.259578 5.872855 6.159023 #> gene5 1.532242 3.399823 3.691250 1.932937 #> gene6 4.246205 4.637316 3.575837 2.730452 #>  #> $classes #> [1] A A A A A A #> Levels: A B"},{"path":"https://kaiaragaki.github.io/reclanc/articles/using-reclanc.html","id":"wide-inputs","dir":"Articles","previous_headings":"Fitting","what":"Wide inputs","title":"using-reclanc","text":"Wide inputs require data predictors outcomes columns, together, single data.frame.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/articles/using-reclanc.html","id":"formula","dir":"Articles","previous_headings":"Fitting > Wide inputs","what":"Formula","title":"using-reclanc","text":"","code":"form_data <- cbind(   class = synthetic_expression$classes,   as.data.frame(t(synthetic_expression$expression)) ) head(form_data[1:5]) #>         class    gene1    gene2    gene3    gene4 #> sample1     A 8.097529 8.641837 3.436236 4.368008 #> sample2     A 7.119188 9.400416 4.317915 5.212750 #> sample3     A 7.304400 8.500865 3.435193 4.618249 #> sample4     A 7.554689 8.878687 3.515755 4.201365 #> sample5     A 7.953206 8.318438 3.024976 3.195294 #> sample6     A 7.714925 8.728683 4.762209 4.707750 clanc(class ~ ., form_data, active = 5) #> <clanc>  #> $centroids #>    class   gene expression pooled_sd active prior #> 1      A gene12   7.514718 0.4779155      5   0.5 #> 2      A  gene2   8.744821 0.3147537      5   0.5 #> 3      A gene13   8.936462 0.3418472      5   0.5 #> 4      A gene21   6.584681 0.5279636      5   0.5 #> 5      A gene24   4.307301 0.7214700      5   0.5 #> 6      A gene74   4.028507 0.4940783      5   0.5 #> 7      A gene41   4.328516 0.6317005      5   0.5 #> 8      A gene95   6.873184 0.4462475      5   0.5 #> 9      A gene52   3.743798 0.5173769      5   0.5 #> 10     A gene66   7.008174 0.5883218      5   0.5 #> 11     B gene12   8.072284 0.4779155      5   0.5 #> 12     B gene13   9.938137 0.3418472      5   0.5 #> 13     B  gene2   8.273987 0.3147537      5   0.5 #> 14     B gene24   3.370467 0.7214700      5   0.5 #> 15     B gene21   5.789423 0.5279636      5   0.5 #> 16     B gene41   5.518354 0.6317005      5   0.5 #> 17     B gene74   3.226598 0.4940783      5   0.5 #> 18     B gene52   2.438579 0.5173769      5   0.5 #> 19     B gene95   6.288173 0.4462475      5   0.5 #> 20     B gene66   7.891588 0.5883218      5   0.5"},{"path":"https://kaiaragaki.github.io/reclanc/articles/using-reclanc.html","id":"recipe","dir":"Articles","previous_headings":"Fitting > Wide inputs","what":"recipe","title":"using-reclanc","text":"reclanc also supports tidymodels workflows:","code":"discrim_linear() |>   set_engine(\"clanc\", active = 5) |>   fit(class ~ ., data = form_data) #> parsnip model object #>  #> <clanc>  #> $centroids #>    class   gene expression pooled_sd active prior #> 1      A gene12   7.514718 0.4779155      5   0.5 #> 2      A  gene2   8.744821 0.3147537      5   0.5 #> 3      A gene13   8.936462 0.3418472      5   0.5 #> 4      A gene21   6.584681 0.5279636      5   0.5 #> 5      A gene24   4.307301 0.7214700      5   0.5 #> 6      A gene74   4.028507 0.4940783      5   0.5 #> 7      A gene41   4.328516 0.6317005      5   0.5 #> 8      A gene95   6.873184 0.4462475      5   0.5 #> 9      A gene52   3.743798 0.5173769      5   0.5 #> 10     A gene66   7.008174 0.5883218      5   0.5 #> 11     B gene12   8.072284 0.4779155      5   0.5 #> 12     B gene13   9.938137 0.3418472      5   0.5 #> 13     B  gene2   8.273987 0.3147537      5   0.5 #> 14     B gene24   3.370467 0.7214700      5   0.5 #> 15     B gene21   5.789423 0.5279636      5   0.5 #> 16     B gene41   5.518354 0.6317005      5   0.5 #> 17     B gene74   3.226598 0.4940783      5   0.5 #> 18     B gene52   2.438579 0.5173769      5   0.5 #> 19     B gene95   6.288173 0.4462475      5   0.5 #> 20     B gene66   7.891588 0.5883218      5   0.5"},{"path":"https://kaiaragaki.github.io/reclanc/articles/using-reclanc.html","id":"tall-inputs","dir":"Articles","previous_headings":"Fitting","what":"Tall inputs","title":"using-reclanc","text":"Tall inputs require genes rows samples columns","code":""},{"path":"https://kaiaragaki.github.io/reclanc/articles/using-reclanc.html","id":"data-framematrix","dir":"Articles","previous_headings":"Fitting > Tall inputs","what":"data.frame/matrix","title":"using-reclanc","text":"often convenient supply data.frame, particularly data-munging done. data.frame matrix inputs require expression genes column names sample IDs rownames, well factor vector classes:","code":"clanc(   synthetic_expression$expression,   classes = synthetic_expression$classes,   active = 5 ) #> <clanc>  #> $centroids #>    class   gene expression pooled_sd active prior #> 1      A gene12   7.514718 0.4779155      5   0.5 #> 2      A  gene2   8.744821 0.3147537      5   0.5 #> 3      A gene13   8.936462 0.3418472      5   0.5 #> 4      A gene21   6.584681 0.5279636      5   0.5 #> 5      A gene24   4.307301 0.7214700      5   0.5 #> 6      A gene74   4.028507 0.4940783      5   0.5 #> 7      A gene41   4.328516 0.6317005      5   0.5 #> 8      A gene95   6.873184 0.4462475      5   0.5 #> 9      A gene52   3.743798 0.5173769      5   0.5 #> 10     A gene66   7.008174 0.5883218      5   0.5 #> 11     B gene12   8.072284 0.4779155      5   0.5 #> 12     B gene13   9.938137 0.3418472      5   0.5 #> 13     B  gene2   8.273987 0.3147537      5   0.5 #> 14     B gene24   3.370467 0.7214700      5   0.5 #> 15     B gene21   5.789423 0.5279636      5   0.5 #> 16     B gene41   5.518354 0.6317005      5   0.5 #> 17     B gene74   3.226598 0.4940783      5   0.5 #> 18     B gene52   2.438579 0.5173769      5   0.5 #> 19     B gene95   6.288173 0.4462475      5   0.5 #> 20     B gene66   7.891588 0.5883218      5   0.5"},{"path":"https://kaiaragaki.github.io/reclanc/articles/using-reclanc.html","id":"summarizedexperiment","dir":"Articles","previous_headings":"Fitting > Tall inputs","what":"SummarizedExperiment","title":"using-reclanc","text":"common formats expression SummarizedExperiments ExpressionSets: can specify name colData (pData ExpressionSets) column contains classes classes argument:","code":"se <- SummarizedExperiment(   synthetic_expression$expression,   colData = DataFrame(class = synthetic_expression$classes) ) se #> class: SummarizedExperiment  #> dim: 100 12  #> metadata(0): #> assays(1): '' #> rownames(100): gene1 gene2 ... gene99 gene100 #> rowData names(0): #> colnames(12): sample1 sample2 ... sample11 sample12 #> colData names(1): class fit <- clanc(   se,   classes = \"class\",   active = 20,   assay = 1 # Index of assay - SummarizedExperiments only ) fit #> <clanc>  #> $centroids #>    class    gene expression pooled_sd active prior #> 1      A  gene11  2.2992343 1.2044848     20   0.5 #> 2      A   gene2  8.7448209 0.3147537     20   0.5 #> 3      A  gene13  8.9364621 0.3418472     20   0.5 #> 4      A  gene20  2.1925558 1.3104010     20   0.5 #> 5      A  gene10  4.9557850 0.8571716     20   0.5 #> 6      A  gene21  6.5846813 0.5279636     20   0.5 #> 7      A gene100  5.6455200 0.6175104     20   0.5 #> 8      A  gene22  6.1650079 0.4699756     20   0.5 #> 9      A  gene46  6.7344030 0.8233370     20   0.5 #> 10     A  gene24  4.3073008 0.7214700     20   0.5 #> 11     A  gene15  2.4254020 1.1910158     20   0.5 #> 12     A  gene25  5.0353875 0.7498139     20   0.5 #> 13     A  gene17  2.9424148 0.6628466     20   0.5 #> 14     A   gene4  4.3839026 0.7144711     20   0.5 #> 15     A  gene56  6.3441126 0.4078736     20   0.5 #> 16     A  gene41  4.3285163 0.6317005     20   0.5 #> 17     A  gene57  4.2237139 0.9531773     20   0.5 #> 18     A   gene7  5.5545202 0.7875124     20   0.5 #> 19     A  gene58  5.6162919 0.8161951     20   0.5 #> 20     A  gene12  7.5147181 0.4779155     20   0.5 #> 21     A   gene6  5.1534126 0.6194184     20   0.5 #> 22     A  gene51  6.6256136 0.7737520     20   0.5 #> 23     A  gene60  4.7434923 1.2945446     20   0.5 #> 24     A  gene52  3.7437977 0.5173769     20   0.5 #> 25     A  gene63  8.9293980 0.5635262     20   0.5 #> 26     A  gene53  4.3774614 0.8370528     20   0.5 #> 27     A  gene66  7.0081742 0.5883218     20   0.5 #> 28     A  gene83  3.6532038 0.8444393     20   0.5 #> 29     A  gene67  6.1384613 0.3677756     20   0.5 #> 30     A  gene85  5.2179679 0.5930857     20   0.5 #> 31     A  gene88  4.6008044 1.0603007     20   0.5 #> 32     A  gene70  1.3073340 1.1264747     20   0.5 #> 33     A  gene47  9.4528373 0.2030726     20   0.5 #> 34     A  gene90  0.9794695 1.3272423     20   0.5 #> 35     A  gene74  4.0285071 0.4940783     20   0.5 #> 36     A  gene94  7.7773183 0.5375914     20   0.5 #> 37     A  gene78  2.1763395 1.6805560     20   0.5 #> 38     A  gene95  6.8731844 0.4462475     20   0.5 #> 39     A  gene79  3.7138831 1.0587367     20   0.5 #> 40     A  gene98  4.5710407 0.6798799     20   0.5 #> 41     B  gene10  4.2378889 0.8571716     20   0.5 #> 42     B   gene2  8.2739866 0.3147537     20   0.5 #> 43     B gene100  5.0435040 0.6175104     20   0.5 #> 44     B  gene20  3.4781598 1.3104010     20   0.5 #> 45     B  gene46  7.0200767 0.8233370     20   0.5 #> 46     B  gene11  1.2780748 1.2044848     20   0.5 #> 47     B  gene12  8.0722841 0.4779155     20   0.5 #> 48     B  gene22  6.4609169 0.4699756     20   0.5 #> 49     B  gene51  5.8920005 0.7737520     20   0.5 #> 50     B  gene13  9.9381374 0.3418472     20   0.5 #> 51     B  gene15  1.6008569 1.1910158     20   0.5 #> 52     B  gene25  4.5015558 0.7498139     20   0.5 #> 53     B  gene17  2.5005839 0.6628466     20   0.5 #> 54     B   gene4  4.9225469 0.7144711     20   0.5 #> 55     B  gene56  6.1067832 0.4078736     20   0.5 #> 56     B  gene41  5.5183538 0.6317005     20   0.5 #> 57     B  gene57  3.1175271 0.9531773     20   0.5 #> 58     B   gene7  5.3367575 0.7875124     20   0.5 #> 59     B  gene21  5.7894231 0.5279636     20   0.5 #> 60     B  gene47  9.5903798 0.2030726     20   0.5 #> 61     B   gene6  4.4655748 0.6194184     20   0.5 #> 62     B  gene74  3.2265977 0.4940783     20   0.5 #> 63     B  gene24  3.3704670 0.7214700     20   0.5 #> 64     B  gene52  2.4385792 0.5173769     20   0.5 #> 65     B  gene63  8.3234317 0.5635262     20   0.5 #> 66     B  gene53  3.8479638 0.8370528     20   0.5 #> 67     B  gene66  7.8915875 0.5883218     20   0.5 #> 68     B  gene83  4.2757218 0.8444393     20   0.5 #> 69     B  gene67  6.0190764 0.3677756     20   0.5 #> 70     B  gene85  5.8877225 0.5930857     20   0.5 #> 71     B  gene79  4.1894417 1.0587367     20   0.5 #> 72     B  gene58  4.7194615 0.8161951     20   0.5 #> 73     B  gene88  5.5945405 1.0603007     20   0.5 #> 74     B  gene70  1.5987845 1.1264747     20   0.5 #> 75     B  gene90  1.4036889 1.3272423     20   0.5 #> 76     B  gene60  5.2336968 1.2945446     20   0.5 #> 77     B  gene78  1.6625207 1.6805560     20   0.5 #> 78     B  gene95  6.2881728 0.4462475     20   0.5 #> 79     B  gene98  4.1346296 0.6798799     20   0.5 #> 80     B  gene94  8.4222554 0.5375914     20   0.5"},{"path":"https://kaiaragaki.github.io/reclanc/articles/using-reclanc.html","id":"predicting","dir":"Articles","previous_headings":"","what":"Predicting","title":"using-reclanc","text":"fit can used predict classes new samples new data. new data can come form matrix, data.frame, SummarizedExperiment, ExpressionSet, expected input Using type = \"class\" predict classes using metric provided Alan Dabney original ClaNC paper. However, particularly comparing across datasets may transformed differently, may accurate use correlation based metric:","code":"predict(fit, new_data = se, type = \"class\") #> 40/40 (100%) genes in centroids found in data #> # A tibble: 12 × 1 #>    .pred_class #>    <fct>       #>  1 A           #>  2 A           #>  3 A           #>  4 A           #>  5 A           #>  6 A           #>  7 B           #>  8 B           #>  9 B           #> 10 B           #> 11 B           #> 12 B predict(fit, new_data = se, type = \"numeric\", method = \"spearman\") #> 40/40 (100%) genes in centroids found in data #> # A tibble: 12 × 2 #>    .pred_A .pred_B #>      <dbl>   <dbl> #>  1   0.901   0.811 #>  2   0.929   0.849 #>  3   0.932   0.840 #>  4   0.912   0.829 #>  5   0.862   0.770 #>  6   0.932   0.869 #>  7   0.776   0.904 #>  8   0.824   0.931 #>  9   0.828   0.924 #> 10   0.855   0.946 #> 11   0.805   0.915 #> 12   0.750   0.869"},{"path":"https://kaiaragaki.github.io/reclanc/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Kai Aragaki. Author, maintainer. Alan Dabney. Author, copyright holder.           Original creator ClaNC","code":""},{"path":"https://kaiaragaki.github.io/reclanc/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Alan D (2005). “Classification microarrays nearest centroids.” Bioinformatics, 21(22), 4148-4154. doi:10.1093/bioinformatics/bti681.","code":"@Article{,   title = {Classification of microarrays to nearest centroids},   author = {Dabney Alan},   journal = {Bioinformatics},   year = {2005},   volume = {21},   number = {22},   pages = {4148-4154},   doi = {10.1093/bioinformatics/bti681}, }"},{"path":"https://kaiaragaki.github.io/reclanc/index.html","id":"reclanc","dir":"","previous_headings":"","what":"A Revival of the ClaNC Algorithm","title":"A Revival of the ClaNC Algorithm","text":"reclanc revival ClaNC (Classification microarrays nearest centroids), Alan R. Dabney. Since source lost (least knowledge), code comes heavy modification. reclanc nearest-centroid classifier expression data. tends little sensitive accurate similar models like PAM. Besides mere existence, reclanc differs slightly original ClaNC package ways: reclanc supports wide variety inputs (data.frame, matrix, formula, recipe, ExpressionSet, SummarizedExperiment) reclanc plays nicely tidymodels, offloads things like making folds rsample tuning tune (see vignette leverage tidymodels reclanc). Provides prediction method based correlation, rather distance - useful predicting classes data different sequencing modalities","code":""},{"path":"https://kaiaragaki.github.io/reclanc/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"A Revival of the ClaNC Algorithm","text":"can install development version reclanc like :","code":"# install.packages(\"pak\") pak::pak(\"KaiAragaki/reclanc\")"},{"path":"https://kaiaragaki.github.io/reclanc/index.html","id":"how-to-use-it","dir":"","previous_headings":"","what":"How to use it","title":"A Revival of the ClaNC Algorithm","text":"information basic usage, see vignette. case study, well optimize active parameter, see vignette.","code":"library(reclanc) lapply(synthetic_expression, head) # dummy data #> $expression #>        sample1  sample2  sample3  sample4  sample5  sample6  sample7  sample8 #> gene1 8.097529 7.119188 7.304400 7.554689 7.953206 7.714925 7.512700 8.597547 #> gene2 8.641837 9.400416 8.500865 8.878687 8.318438 8.728683 7.812591 7.638167 #> gene3 3.436236 4.317915 3.435193 3.515755 3.024976 4.762209 5.048956 2.006646 #> gene4 4.368008 5.212750 4.618249 4.201365 3.195294 4.707750 5.126769 6.178658 #> gene5 2.423974 3.563816 4.062362 2.163278 2.021435 2.813873 0.000000 4.652358 #> gene6 5.371205 5.919809 4.366915 4.805534 4.834856 5.622157 3.883531 3.593082 #>        sample9 sample10 sample11 sample12 #> gene1 6.475641 7.648858 8.637526 7.345038 #> gene2 8.110285 7.906104 7.424728 7.927039 #> gene3 2.739211 3.111668 3.161077 4.306611 #> gene4 5.170265 4.259578 5.872855 6.159023 #> gene5 1.532242 3.399823 3.691250 1.932937 #> gene6 4.246205 4.637316 3.575837 2.730452 #>  #> $classes #> [1] A A A A A A #> Levels: A B centroids <- clanc(   synthetic_expression$expression,   classes = synthetic_expression$classes,   active = 5 ) centroids #> <clanc>  #> $centroids #>    class   gene expression pooled_sd active prior #> 1      A gene13   8.936462 0.3418472      5   0.5 #> 2      A gene21   7.379940 0.5279636      5   0.5 #> 3      A  gene2   8.744821 0.3147537      5   0.5 #> 4      A gene74   4.028507 0.4940783      5   0.5 #> 5      A gene41   4.328516 0.6317005      5   0.5 #> 6      A gene66   6.124761 0.5883218      5   0.5 #> 7      A gene24   4.307301 0.7214700      5   0.5 #> 8      A gene95   6.288173 0.4462475      5   0.5 #> 9      A gene94   7.777318 0.5375914      5   0.5 #> 10     A gene52   3.743798 0.5173769      5   0.5 #> 11     B gene13   9.938137 0.3418472      5   0.5 #> 12     B  gene2   8.273987 0.3147537      5   0.5 #> 13     B gene21   6.584681 0.5279636      5   0.5 #> 14     B gene41   5.518354 0.6317005      5   0.5 #> 15     B gene74   3.226598 0.4940783      5   0.5 #> 16     B gene24   3.370467 0.7214700      5   0.5 #> 17     B gene66   7.008174 0.5883218      5   0.5 #> 18     B gene94   8.422255 0.5375914      5   0.5 #> 19     B gene95   5.703161 0.4462475      5   0.5 #> 20     B gene52   2.438579 0.5173769      5   0.5"},{"path":"https://kaiaragaki.github.io/reclanc/index.html","id":"how-it-works","dir":"","previous_headings":"","what":"How it works","title":"A Revival of the ClaNC Algorithm","text":"can find gentle introduction reclanc works -depth statistically rigorous definition algorithm works original paper.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/index.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"A Revival of the ClaNC Algorithm","text":"Citation original ClaNC paper: Alan R. Dabney, Classification microarrays nearest centroids, Bioinformatics, Volume 21, Issue 22, November 2005, Pages 4148–4154, https://doi.org/10.1093/bioinformatics/bti681","code":""},{"path":"https://kaiaragaki.github.io/reclanc/reference/clanc.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate centroids from expression data with ClaNC — clanc","title":"Calculate centroids from expression data with ClaNC — clanc","text":"Calculate centroids expression data ClaNC","code":""},{"path":"https://kaiaragaki.github.io/reclanc/reference/clanc.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate centroids from expression data with ClaNC — clanc","text":"","code":"clanc(x, ...)  # Default S3 method clanc(x, ...)  # S3 method for class 'data.frame' clanc(x, classes, active, priors = \"equal\", ...)  # S3 method for class 'matrix' clanc(x, classes, active, priors = \"equal\", ...)  # S3 method for class 'SummarizedExperiment' clanc(x, classes, active, priors = \"equal\", assay = 1, ...)  # S3 method for class 'ExpressionSet' clanc(x, classes, active, priors = \"equal\", ...)  # S3 method for class 'formula' clanc(formula, data, active, priors = \"equal\", ...)  # S3 method for class 'recipe' clanc(x, data, active, priors = \"equal\", ...)"},{"path":"https://kaiaragaki.github.io/reclanc/reference/clanc.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate centroids from expression data with ClaNC — clanc","text":"x Depending context: data frame expression. matrix expression. recipe specifying set preprocessing steps created recipes::recipe(). ExpressionSet. SummarizedExperiment assay containing expression. Expression library-size corrected, scaled. supplying data frame, matrix, ExpressionSet, SummarizedExperiment, rows represent genes, columns represent samples (standard expression data). column names sample IDs, row names gene IDs. recipe provided, data genes columns (match formula provided recipe.) ... currently used, required extensibility. classes x data frame matrix, class contains class labels form either: data frame 1 factor column factor vector. x ExpressionSet SummarizedExperiment, class name column pData(x) colData(x) contains classes factor. active Either single number numeric vector equal length number unique class labels. Represents number class-specific genes selected centroid. Note different numbers genes can selected class. See details. x ExpressionSet SummarizedExperiment, active can additionally name column pData(x) colData(x) contains numeric vector priors Can take variety values: \"equal\" - class equal prior \"class\" - class prior equal frequency training set numeric vector length equal number classes x ExpressionSet SummarizedExperiment, active can additionally name column pData(x) colData(x) contains numeric vector assay SummarizedExperiment used, index name assay formula formula specifying classes left-hand side, predictor terms right-hand side. data recipe formula used, data specified : data frame containing expression classes, columns genes class, rows samples.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/reference/clanc.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate centroids from expression data with ClaNC — clanc","text":"clanc object.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/reference/clanc.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Calculate centroids from expression data with ClaNC — clanc","text":"original description ClaNC can found active sets number class-specific genes, centroid number genes. explain way example, active = 5 3 classes, centroid 15 genes, 5 genes particular given class. genes 'active' class, values mean class. genes active given class, values overall expression given gene across classes.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/reference/clanc.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculate centroids from expression data with ClaNC — clanc","text":"","code":"expression_matrix <- synthetic_expression$expression head(expression_matrix) #>        sample1  sample2  sample3  sample4  sample5  sample6  sample7  sample8 #> gene1 8.097529 7.119188 7.304400 7.554689 7.953206 7.714925 7.512700 8.597547 #> gene2 8.641837 9.400416 8.500865 8.878687 8.318438 8.728683 7.812591 7.638167 #> gene3 3.436236 4.317915 3.435193 3.515755 3.024976 4.762209 5.048956 2.006646 #> gene4 4.368008 5.212750 4.618249 4.201365 3.195294 4.707750 5.126769 6.178658 #> gene5 2.423974 3.563816 4.062362 2.163278 2.021435 2.813873 0.000000 4.652358 #> gene6 5.371205 5.919809 4.366915 4.805534 4.834856 5.622157 3.883531 3.593082 #>        sample9 sample10 sample11 sample12 #> gene1 6.475641 7.648858 8.637526 7.345038 #> gene2 8.110285 7.906104 7.424728 7.927039 #> gene3 2.739211 3.111668 3.161077 4.306611 #> gene4 5.170265 4.259578 5.872855 6.159023 #> gene5 1.532242 3.399823 3.691250 1.932937 #> gene6 4.246205 4.637316 3.575837 2.730452 classes <- synthetic_expression$classes classes #>  [1] A A A A A A B B B B B B #> Levels: A B  # data.frame/tibble/matrix interface:  clanc(expression_matrix, classes = classes, active = 5, priors = \"equal\") #> <clanc>  #> $centroids #>    class   gene expression pooled_sd active prior #> 1      A gene12   7.514718 0.4779155      5   0.5 #> 2      A  gene2   8.744821 0.3147537      5   0.5 #> 3      A gene13   8.936462 0.3418472      5   0.5 #> 4      A gene21   6.584681 0.5279636      5   0.5 #> 5      A gene24   4.307301 0.7214700      5   0.5 #> 6      A gene74   4.028507 0.4940783      5   0.5 #> 7      A gene41   4.328516 0.6317005      5   0.5 #> 8      A gene95   6.873184 0.4462475      5   0.5 #> 9      A gene52   3.743798 0.5173769      5   0.5 #> 10     A gene66   7.008174 0.5883218      5   0.5 #> 11     B gene12   8.072284 0.4779155      5   0.5 #> 12     B gene13   9.938137 0.3418472      5   0.5 #> 13     B  gene2   8.273987 0.3147537      5   0.5 #> 14     B gene24   3.370467 0.7214700      5   0.5 #> 15     B gene21   5.789423 0.5279636      5   0.5 #> 16     B gene41   5.518354 0.6317005      5   0.5 #> 17     B gene74   3.226598 0.4940783      5   0.5 #> 18     B gene52   2.438579 0.5173769      5   0.5 #> 19     B gene95   6.288173 0.4462475      5   0.5 #> 20     B gene66   7.891588 0.5883218      5   0.5 #>   # Formula interface:  # Data must have class included as a column # Genes must be *columns* and samples must be *rows* # Hence the data transposition. for_formula <- data.frame(class = classes, t(expression_matrix))  clanc(class ~ ., for_formula, active = 5, priors = \"equal\") #> <clanc>  #> $centroids #>    class   gene expression pooled_sd active prior #> 1      A gene12   7.514718 0.4779155      5   0.5 #> 2      A  gene2   8.744821 0.3147537      5   0.5 #> 3      A gene13   8.936462 0.3418472      5   0.5 #> 4      A gene21   6.584681 0.5279636      5   0.5 #> 5      A gene24   4.307301 0.7214700      5   0.5 #> 6      A gene74   4.028507 0.4940783      5   0.5 #> 7      A gene41   4.328516 0.6317005      5   0.5 #> 8      A gene95   6.873184 0.4462475      5   0.5 #> 9      A gene52   3.743798 0.5173769      5   0.5 #> 10     A gene66   7.008174 0.5883218      5   0.5 #> 11     B gene12   8.072284 0.4779155      5   0.5 #> 12     B gene13   9.938137 0.3418472      5   0.5 #> 13     B  gene2   8.273987 0.3147537      5   0.5 #> 14     B gene24   3.370467 0.7214700      5   0.5 #> 15     B gene21   5.789423 0.5279636      5   0.5 #> 16     B gene41   5.518354 0.6317005      5   0.5 #> 17     B gene74   3.226598 0.4940783      5   0.5 #> 18     B gene52   2.438579 0.5173769      5   0.5 #> 19     B gene95   6.288173 0.4462475      5   0.5 #> 20     B gene66   7.891588 0.5883218      5   0.5 #>    # Recipes interface:  rec <- recipes::recipe(class ~ ., data = for_formula)  clanc(rec, for_formula, active = 5, priors = \"equal\") #> <clanc>  #> $centroids #>    class   gene expression pooled_sd active prior #> 1      A gene12   7.514718 0.4779155      5   0.5 #> 2      A  gene2   8.744821 0.3147537      5   0.5 #> 3      A gene13   8.936462 0.3418472      5   0.5 #> 4      A gene21   6.584681 0.5279636      5   0.5 #> 5      A gene24   4.307301 0.7214700      5   0.5 #> 6      A gene74   4.028507 0.4940783      5   0.5 #> 7      A gene41   4.328516 0.6317005      5   0.5 #> 8      A gene95   6.873184 0.4462475      5   0.5 #> 9      A gene52   3.743798 0.5173769      5   0.5 #> 10     A gene66   7.008174 0.5883218      5   0.5 #> 11     B gene12   8.072284 0.4779155      5   0.5 #> 12     B gene13   9.938137 0.3418472      5   0.5 #> 13     B  gene2   8.273987 0.3147537      5   0.5 #> 14     B gene24   3.370467 0.7214700      5   0.5 #> 15     B gene21   5.789423 0.5279636      5   0.5 #> 16     B gene41   5.518354 0.6317005      5   0.5 #> 17     B gene74   3.226598 0.4940783      5   0.5 #> 18     B gene52   2.438579 0.5173769      5   0.5 #> 19     B gene95   6.288173 0.4462475      5   0.5 #> 20     B gene66   7.891588 0.5883218      5   0.5 #>   # SummarizedExperiment interface: se <- SummarizedExperiment::SummarizedExperiment(   expression_matrix,   colData = data.frame(     class = classes,     active = 5,     prior = c(0.5, 0.5)   ) )  clanc(se, classes = \"class\", active = \"active\", priors = \"equal\") #> <clanc>  #> $centroids #>    class   gene expression pooled_sd active prior #> 1      A gene12   7.514718 0.4779155      5   0.5 #> 2      A  gene2   8.744821 0.3147537      5   0.5 #> 3      A gene13   8.936462 0.3418472      5   0.5 #> 4      A gene21   6.584681 0.5279636      5   0.5 #> 5      A gene24   4.307301 0.7214700      5   0.5 #> 6      A gene74   4.028507 0.4940783      5   0.5 #> 7      A gene41   4.328516 0.6317005      5   0.5 #> 8      A gene95   6.873184 0.4462475      5   0.5 #> 9      A gene52   3.743798 0.5173769      5   0.5 #> 10     A gene66   7.008174 0.5883218      5   0.5 #> 11     B gene12   8.072284 0.4779155      5   0.5 #> 12     B gene13   9.938137 0.3418472      5   0.5 #> 13     B  gene2   8.273987 0.3147537      5   0.5 #> 14     B gene24   3.370467 0.7214700      5   0.5 #> 15     B gene21   5.789423 0.5279636      5   0.5 #> 16     B gene41   5.518354 0.6317005      5   0.5 #> 17     B gene74   3.226598 0.4940783      5   0.5 #> 18     B gene52   2.438579 0.5173769      5   0.5 #> 19     B gene95   6.288173 0.4462475      5   0.5 #> 20     B gene66   7.891588 0.5883218      5   0.5 #>   # ExpressionSet interface: adf <- data.frame(   row.names = colnames(expression_matrix),   class = classes ) |>   Biobase::AnnotatedDataFrame()  es <- Biobase::ExpressionSet(expression_matrix, adf) clanc(es, classes = \"class\", active = 5, priors = 0.5) #> <clanc>  #> $centroids #>    class   gene expression pooled_sd active prior #> 1      A gene12   7.514718 0.4779155      5   0.5 #> 2      A  gene2   8.744821 0.3147537      5   0.5 #> 3      A gene13   8.936462 0.3418472      5   0.5 #> 4      A gene21   6.584681 0.5279636      5   0.5 #> 5      A gene24   4.307301 0.7214700      5   0.5 #> 6      A gene74   4.028507 0.4940783      5   0.5 #> 7      A gene41   4.328516 0.6317005      5   0.5 #> 8      A gene95   6.873184 0.4462475      5   0.5 #> 9      A gene52   3.743798 0.5173769      5   0.5 #> 10     A gene66   7.008174 0.5883218      5   0.5 #> 11     B gene12   8.072284 0.4779155      5   0.5 #> 12     B gene13   9.938137 0.3418472      5   0.5 #> 13     B  gene2   8.273987 0.3147537      5   0.5 #> 14     B gene24   3.370467 0.7214700      5   0.5 #> 15     B gene21   5.789423 0.5279636      5   0.5 #> 16     B gene41   5.518354 0.6317005      5   0.5 #> 17     B gene74   3.226598 0.4940783      5   0.5 #> 18     B gene52   2.438579 0.5173769      5   0.5 #> 19     B gene95   6.288173 0.4462475      5   0.5 #> 20     B gene66   7.891588 0.5883218      5   0.5 #>"},{"path":"https://kaiaragaki.github.io/reclanc/reference/predict.clanc.html","id":null,"dir":"Reference","previous_headings":"","what":"Predict from a clanc — predict.clanc","title":"Predict from a clanc — predict.clanc","text":"Predict clanc","code":""},{"path":"https://kaiaragaki.github.io/reclanc/reference/predict.clanc.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Predict from a clanc — predict.clanc","text":"","code":"# S3 method for class 'clanc' predict(object, new_data, type, assay = NULL, format = c(\"wide\", \"tall\"), ...)"},{"path":"https://kaiaragaki.github.io/reclanc/reference/predict.clanc.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Predict from a clanc — predict.clanc","text":"object clanc object. new_data data frame matrix new predictors. type single character. type predictions generate. Valid options : \"numeric\" numeric predictions. assay object inherits SummarizedExperiment, index assay. ... used, required extensibility. method type numeric, method correlation format. Character. data \"wide\" (default), genes columns, \"tall\", genes rows?","code":""},{"path":"https://kaiaragaki.github.io/reclanc/reference/predict.clanc.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Predict from a clanc — predict.clanc","text":"tibble predictions. number rows tibble guaranteed number rows new_data.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/reference/synthetic_expression.html","id":null,"dir":"Reference","previous_headings":"","what":"Synthetic Expression of Two Distinct Classes — synthetic_expression","title":"Synthetic Expression of Two Distinct Classes — synthetic_expression","text":"Synthetic Expression Two Distinct Classes","code":""},{"path":"https://kaiaragaki.github.io/reclanc/reference/synthetic_expression.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Synthetic Expression of Two Distinct Classes — synthetic_expression","text":"","code":"synthetic_expression"},{"path":[]},{"path":"https://kaiaragaki.github.io/reclanc/reference/synthetic_expression.html","id":"synthetic-expression","dir":"Reference","previous_headings":"","what":"synthetic_expression","title":"Synthetic Expression of Two Distinct Classes — synthetic_expression","text":"list containing two items: expression Normalized log expression 12 samples across 100 genes classes factor vector classes 12 samples","code":""}]
+[{"path":"https://kaiaragaki.github.io/reclanc/CONTRIBUTING.html","id":null,"dir":"","previous_headings":"","what":"Contributing to reclanc","title":"Contributing to reclanc","text":"outlines propose change reclanc.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/CONTRIBUTING.html","id":"fixing-typos","dir":"","previous_headings":"","what":"Fixing typos","title":"Contributing to reclanc","text":"can fix typos, spelling mistakes, grammatical errors documentation directly using GitHub web interface, long changes made source file. generally means ’ll need edit roxygen2 comments .R, .Rd file. can find .R file generates .Rd reading comment first line.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/CONTRIBUTING.html","id":"bigger-changes","dir":"","previous_headings":"","what":"Bigger changes","title":"Contributing to reclanc","text":"want make bigger change, ’s good idea first file issue make sure someone team agrees ’s needed. ’ve found bug, please file issue illustrates bug minimal reprex (also help write unit test, needed). See guide create great issue advice.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/CONTRIBUTING.html","id":"pull-request-process","dir":"","previous_headings":"Bigger changes","what":"Pull request process","title":"Contributing to reclanc","text":"Fork package clone onto computer. haven’t done , recommend using usethis::create_from_github(\"KaiAragaki/reclanc\", fork = TRUE). Install development dependencies devtools::install_dev_deps(), make sure package passes R CMD check running devtools::check(). R CMD check doesn’t pass cleanly, ’s good idea ask help continuing. Create Git branch pull request (PR). recommend using usethis::pr_init(\"brief-description--change\"). Make changes, commit git, create PR running usethis::pr_push(), following prompts browser. title PR briefly describe change. body PR contain Fixes #issue-number. user-facing changes, add bullet top NEWS.md (.e. just first header). Follow style described https://style.tidyverse.org/news.html.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/CONTRIBUTING.html","id":"code-style","dir":"","previous_headings":"Bigger changes","what":"Code style","title":"Contributing to reclanc","text":"New code follow tidyverse style guide. can use styler package apply styles, please don’t restyle code nothing PR. use roxygen2, Markdown syntax, documentation. use testthat unit tests. Contributions test cases included easier accept.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/CONTRIBUTING.html","id":"code-of-conduct","dir":"","previous_headings":"","what":"Code of Conduct","title":"Contributing to reclanc","text":"Please note reclanc project released Contributor Code Conduct. contributing project agree abide terms.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2024 reclanc authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/articles/case-study.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"case-study","text":"Let’s consider relatively full-featured, practical use case reclanc. vignette, ’ll go basics fitting models, well leverage tidymodels elaborate things like resampling tuning hyperparameters. ’ll fit final model, use predict subtypes entirely new dataset. vignette tries assume little knowledge machine learning tidymodels.","code":""},{"path":[]},{"path":"https://kaiaragaki.github.io/reclanc/articles/case-study.html","id":"a-simple-fit","dir":"Articles","previous_headings":"Fitting","what":"A simple fit","title":"case-study","text":"Let’s start fitting procedure. first need gene expression data. data ’m using Sjödahl et al. (2012). contains RNA expression 308 bladder cancer tumors. paper, Sjödahl et al. used transcriptional data classify tumors seven molecular subtypes (MS): ’d like apply subtype framework datasets. , first need generate centroids. can begin, though, need convert outcomes factors. case, outcomes molecular subtypes: simplest form, since clanc accepts ExpressionSet objects, following done : problem method, though, idea good fit . active argument specifies number genes used distinguishing features given class. case, class find 5 genes expression patterns peculiar given molecular subtype, subtype 7 (total number subtypes) x 5 (number active genes) = 35 genes (see blog post - better yet - original paper details). gotten better fit genes? selecting genes need? know?","code":"lund <- s3readRDS(\"lund.rds\", \"reclanc-lund\", region = \"us-east-2\") lund #> ExpressionSet (storageMode: lockedEnvironment) #> assayData: 16940 features, 308 samples  #>   element names: exprs  #> protocolData: none #> phenoData #>   sampleNames: UC_0001_1 UC_0002_1 ... UC_0785_1 (308 total) #>   varLabels: title source ... sample (16 total) #>   varMetadata: labelDescription #> featureData: none #> experimentData: use 'experimentData(object)' #> Annotation: table(lund$molecular_subtype) #>  #>    MS1a    MS1b  MS2a.1  MS2a.2  MS2b.1 MS2b2.1 MS2b2.2  #>      53      78      30      55      43      20      29 lund$molecular_subtype <- factor(lund$molecular_subtype) simple_centroids <- clanc(lund, classes = \"molecular_subtype\", active = 5) head(simple_centroids$centroids) #>   class    gene expression pooled_sd active     prior #> 1  MS1a   CXCL1   6.534490 0.8749133      5 0.1428571 #> 2  MS1a     MMD   7.922508 0.6429620      5 0.1428571 #> 3  MS1a C9orf19   8.378910 0.7510552      5 0.1428571 #> 4  MS1a    BNC1   5.297095 0.2106762      5 0.1428571 #> 5  MS1a  SLFN11   7.362887 0.6824663      5 0.1428571 #> 6  MS1a    CRAT   6.004517 0.3425669      5 0.1428571"},{"path":"https://kaiaragaki.github.io/reclanc/articles/case-study.html","id":"setting-the-stage-for-more-elaborate-analyses","dir":"Articles","previous_headings":"Fitting","what":"Setting the stage for more elaborate analyses","title":"case-study","text":"can get started tackling larger questions, let’s take brief detour land tidymodels. tidymodels collection packages make running tuning algorithms like much less painful much standardized. order leverage tidymodels, need buy-data structures. (Aside: don’t mean make buy-sound begrudging. say need, really mean : ’re going specifying long formulas, reason R really, really hates. Emil Hvitfeldt recently (time writing) allowed tidymodels handle long formulas gracefully, using tidymodels infrastructure gift, chore.) Many tidymodels workflows begin model specification. rationale behind separate model specification step model fitting step (whereas base R, generally happen ). reclanc makes easy specify model adding custom engine parsnip::discrim_linear, specifying model looks like : mod doesn’t anything - ’s kind point: specifies model later fit , doesn’t fitting . allows us reuse specification across code. next step wrangle data bit ‘wide’ format, columns outcomes (classes) predictors (genes), rows observations (samples): Finally, specify formula fitting model. uses recipes package tidymodels. delightful package can help preprocess data, ’s scope vignette. Instead, just think way specify formula keeps R blowing : can bundle model specification (mod) preprocessing steps (recipe, just formula) workflow: Now can fit model: ’ll notice results saw previously, demonstrating ’re using tidymodels rather base R, ’re still thing.","code":"library(tidymodels) mod <- discrim_linear() |>   set_engine(     engine = \"clanc\", # Note: \"clanc\", not \"reclanc\"     active = 5   ) wrangled <- data.frame(class = lund$molecular_subtype, t(exprs(lund))) head(wrangled[1:5]) #>            class LOC23117   FCGR2B    TRIM44 C15orf39 #> UC_0001_1   MS1b 5.565262 5.306654  9.305053 6.430063 #> UC_0002_1 MS2b.1 5.505854 5.731128  9.242790 7.265748 #> UC_0003_1 MS2a.2 5.336140 5.540470  9.888668 7.244976 #> UC_0006_2 MS2b.1 5.576748 5.847743  9.408895 7.377358 #> UC_0007_1 MS2a.2 5.414919 5.510507 10.482469 6.435552 #> UC_0008_1 MS2b.1 5.279174 5.633093  9.112754 7.057977 # Note that the recipe requires 'template data' recipe <- recipe(class ~ ., wrangled) wf <- workflow() |>   add_recipe(recipe) |>   add_model(mod) wf #> ══ Workflow ════════════════════════════════════════════════════════════════════ #> Preprocessor: Recipe #> Model: discrim_linear() #>  #> ── Preprocessor ──────────────────────────────────────────────────────────────── #> 0 Recipe Steps #>  #> ── Model ─────────────────────────────────────────────────────────────────────── #> Linear Discriminant Model Specification (classification) #>  #> Engine-Specific Arguments: #>   active = 5 #>  #> Computational engine: clanc tidymodels_fit <- fit(wf, data = wrangled) head(extract_fit_parsnip(tidymodels_fit)$fit$centroids) #>   class    gene expression pooled_sd active     prior #> 1  MS1a   CXCL1   6.534490 0.8749133      5 0.1428571 #> 2  MS1a     MMD   7.922508 0.6429620      5 0.1428571 #> 3  MS1a C9orf19   8.378910 0.7510552      5 0.1428571 #> 4  MS1a    BNC1   5.297095 0.2106762      5 0.1428571 #> 5  MS1a  SLFN11   7.362887 0.6824663      5 0.1428571 #> 6  MS1a    CRAT   6.004517 0.3425669      5 0.1428571"},{"path":"https://kaiaragaki.github.io/reclanc/articles/case-study.html","id":"measuring-fit-accuracy-with-cross-validation","dir":"Articles","previous_headings":"Fitting","what":"Measuring fit accuracy with cross-validation","title":"case-study","text":"Now ’ve dialed tidymodels framework, can lot elaborate things ease. One concerns whether 5 active genes good choice (active = 5). somewhat simple way determine good choice 5 genes use cross-validation. Cross-validation allows us test good fit training model , say, 80% data, testing rest (see Wikipedia diagram k-fold cross validation). allows us get measure good fit , without break actual test data - general used ’re ready finalize model. Speaking test data, let’s go ahead split now. ’ll lock test data away use ’ve fit final model. , ’ll use cross validation assess good fit , essentially using training data testing data. course, tidymodels makes easy , using rsample::initial_split: train test just subsets original data, containing 80% 20% original data (respectively). also tries maintain relative proportions classes within datasets (set strata = class): Creating folds cross validation nearly initial_split: can reuse workflow wf, contains model formula. difference use fit_resamples, specify metric want use measure good fit (remember every fold chunk data uses test fit). simplicity, let’s use accuracy: can extract accuracy metrics using collect_metrics, roots around fits helpfully extracts metrics, aggregates , calculated standard error: model accuracy 74%. Applying model testing data: Note testing data accuracy (%) approximates training data accuracy (74%).","code":"set.seed(123) splits <- initial_split(wrangled, prop = 0.8, strata = class) train <- training(splits) test <- testing(splits) round(prop.table(table(train$class)), 2) #>  #>    MS1a    MS1b  MS2a.1  MS2a.2  MS2b.1 MS2b2.1 MS2b2.2  #>    0.17    0.25    0.10    0.18    0.15    0.07    0.08 round(prop.table(table(test$class)), 2) #>  #>    MS1a    MS1b  MS2a.1  MS2a.2  MS2b.1 MS2b2.1 MS2b2.2  #>    0.19    0.27    0.08    0.16    0.11    0.05    0.16 folds <- vfold_cv(train, v = 5, strata = class) folds #> #  5-fold cross-validation using stratification  #> # A tibble: 5 × 2 #>   splits           id    #>   <list>           <chr> #> 1 <split [193/51]> Fold1 #> 2 <split [193/51]> Fold2 #> 3 <split [195/49]> Fold3 #> 4 <split [197/47]> Fold4 #> 5 <split [198/46]> Fold5 fits <- fit_resamples(   wf,   folds,   metrics = metric_set(accuracy) ) #> 35/35 (100%) genes in centroids found in data #> 35/35 (100%) genes in centroids found in data #> 35/35 (100%) genes in centroids found in data #> 35/35 (100%) genes in centroids found in data #> 35/35 (100%) genes in centroids found in data fits #> # Resampling results #> # 5-fold cross-validation using stratification  #> # A tibble: 5 × 4 #>   splits           id    .metrics         .notes           #>   <list>           <chr> <list>           <list>           #> 1 <split [193/51]> Fold1 <tibble [1 × 4]> <tibble [0 × 3]> #> 2 <split [193/51]> Fold2 <tibble [1 × 4]> <tibble [0 × 3]> #> 3 <split [195/49]> Fold3 <tibble [1 × 4]> <tibble [0 × 3]> #> 4 <split [197/47]> Fold4 <tibble [1 × 4]> <tibble [0 × 3]> #> 5 <split [198/46]> Fold5 <tibble [1 × 4]> <tibble [0 × 3]> metrics <- collect_metrics(fits) metrics #> # A tibble: 1 × 6 #>   .metric  .estimator  mean     n std_err .config              #>   <chr>    <chr>      <dbl> <int>   <dbl> <chr>                #> 1 accuracy multiclass 0.737     5  0.0289 Preprocessor1_Model1 # Fit a model using *all* of our training data final_fit <- clanc(class ~ ., train, active = 5)  # Use it to predict the (known) classes of our test data preds <- predict(final_fit, new_data = test, type = \"class\") #> 35/35 (100%) genes in centroids found in data w_preds <- cbind(preds, test) # Compare known class vs predicted class metric <- accuracy(w_preds, class, .pred_class) metric #> # A tibble: 1 × 3 #>   .metric  .estimator .estimate #>   <chr>    <chr>          <dbl> #> 1 accuracy multiclass     0.734"},{"path":"https://kaiaragaki.github.io/reclanc/articles/case-study.html","id":"tuning-hyperparameters-with-tune","dir":"Articles","previous_headings":"Fitting","what":"Tuning hyperparameters with tune","title":"case-study","text":"Now least measure good model fits, better genes? get away fewer? Running command different numbers drag - fortunately, ’s yet another beautiful package help us: tune. use tune, need re-specify model let tune know parameters want tune: update previous workflow using update_model, let’s just declare new one: specify range values active try: can fit folds using spread values chose: , can collect metrics - time, however, summary metrics values active: graphically:  looks like read maximal accuracy around 21 genes - let’s choose 20 genes nice round number: looks like accuracy little better now ’ve chosen optimal number active genes.","code":"tune_mod <- discrim_linear() |>   set_engine(     engine = \"clanc\",     active = tune()   ) tune_wf <- workflow() |>   add_recipe(recipe) |>   add_model(tune_mod) values <- data.frame(active = seq(from = 1, to = 50, by = 4)) values #>    active #> 1       1 #> 2       5 #> 3       9 #> 4      13 #> 5      17 #> 6      21 #> 7      25 #> 8      29 #> 9      33 #> 10     37 #> 11     41 #> 12     45 #> 13     49 # This is going to take some time, since we're fitting 5 folds 13 times each. tuned <- tune_grid(   tune_wf,   folds,   metrics = metric_set(accuracy),   grid = values ) tuned #> # Tuning results #> # 5-fold cross-validation using stratification  #> # A tibble: 5 × 4 #>   splits           id    .metrics          .notes           #>   <list>           <chr> <list>            <list>           #> 1 <split [193/51]> Fold1 <tibble [13 × 5]> <tibble [0 × 3]> #> 2 <split [193/51]> Fold2 <tibble [13 × 5]> <tibble [0 × 3]> #> 3 <split [195/49]> Fold3 <tibble [13 × 5]> <tibble [0 × 3]> #> 4 <split [197/47]> Fold4 <tibble [13 × 5]> <tibble [0 × 3]> #> 5 <split [198/46]> Fold5 <tibble [13 × 5]> <tibble [0 × 3]> tuned_metrics <- collect_metrics(tuned) tuned_metrics #> # A tibble: 13 × 7 #>    active .metric  .estimator  mean     n std_err .config               #>     <dbl> <chr>    <chr>      <dbl> <int>   <dbl> <chr>                 #>  1      1 accuracy multiclass 0.585     5  0.0368 Preprocessor1_Model01 #>  2      5 accuracy multiclass 0.737     5  0.0289 Preprocessor1_Model02 #>  3      9 accuracy multiclass 0.748     5  0.0496 Preprocessor1_Model03 #>  4     13 accuracy multiclass 0.781     5  0.0403 Preprocessor1_Model04 #>  5     17 accuracy multiclass 0.770     5  0.0280 Preprocessor1_Model05 #>  6     21 accuracy multiclass 0.774     5  0.0335 Preprocessor1_Model06 #>  7     25 accuracy multiclass 0.785     5  0.0378 Preprocessor1_Model07 #>  8     29 accuracy multiclass 0.794     5  0.0319 Preprocessor1_Model08 #>  9     33 accuracy multiclass 0.773     5  0.0281 Preprocessor1_Model09 #> 10     37 accuracy multiclass 0.790     5  0.0295 Preprocessor1_Model10 #> 11     41 accuracy multiclass 0.794     5  0.0339 Preprocessor1_Model11 #> 12     45 accuracy multiclass 0.815     5  0.0267 Preprocessor1_Model12 #> 13     49 accuracy multiclass 0.815     5  0.0277 Preprocessor1_Model13 ggplot(tuned_metrics, aes(active, mean)) +   geom_line() +   coord_cartesian(ylim = c(0, 1)) +   labs(x = \"Number Active Genes\", y = \"Accuracy\") final_fit_tuned <- clanc(class ~ ., data = train, active = 20) # Use it to predict the (known) classes of our test data: preds <- predict(final_fit_tuned, new_data = test, type = \"class\") #> 140/140 (100%) genes in centroids found in data w_preds <- cbind(preds, test) # Compare known class vs predicted class: metric <- accuracy(w_preds, class, .pred_class) metric #> # A tibble: 1 × 3 #>   .metric  .estimator .estimate #>   <chr>    <chr>          <dbl> #> 1 accuracy multiclass     0.812"},{"path":"https://kaiaragaki.github.io/reclanc/articles/case-study.html","id":"predicting","dir":"Articles","previous_headings":"","what":"Predicting","title":"case-study","text":"Now want apply classifier new data. second dataset RNAseq data 30 bladder cancer cell lines: Predicting incredibly simple. Since ’re using different sequencing method (RNAseq vs array-based sequencing), probably makes sense use correlation based classification rather original distance-based metric used original ClaNC package. can specifying type = \"numeric\" whatever correlation method prefer.  Sjödahl paper, seven subtypes simplified five subtypes merging two similar biological pathways activated. ease interpretation, can :","code":"library(cellebrate) cell_rna #> class: DESeqDataSet  #> dim: 18548 30  #> metadata(1): version #> assays(2): counts rlog_norm_counts #> rownames(18548): TSPAN6 TNMD ... MT-ND5 MT-ND6 #> rowData names(0): #> colnames(30): 1A6 253JP ... UC7 UC9 #> colData names(5): cell bsl lum call clade cell_preds <- predict(   final_fit_tuned,   cell_rna,   assay = 2,   type = \"numeric\",   method = \"spearman\" ) #> 118/140 (84%) genes in centroids found in data  out <- cbind(colData(cell_rna), cell_preds) |>   as_tibble()  out #> # A tibble: 30 × 12 #>    cell     bsl    lum call  clade            .pred_MS1a .pred_MS1b .pred_MS2a.1 #>    <chr>  <dbl>  <dbl> <chr> <fct>                 <dbl>      <dbl>        <dbl> #>  1 1A6     99.0   1.02 BSL   Epithelial Other     0.0600      0.224        0.149 #>  2 253JP   76.6  23.4  BSL   Unknown              0.0574      0.240        0.219 #>  3 5637    98.5   1.46 BSL   Epithelial Other     0.0958      0.243        0.160 #>  4 BV      49.9  50.1  LUM   Unknown              0.0758      0.262        0.238 #>  5 HT1197  56.0  44.0  BSL   Epithelial Other     0.119       0.288        0.224 #>  6 HT1376  10.9  89.1  LUM   Epithelial Other     0.100       0.277        0.238 #>  7 J82     98.1   1.91 BSL   Mesenchymal          0.127       0.292        0.219 #>  8 RT112    0   100    LUM   Luminal Papilla…     0.173       0.380        0.294 #>  9 RT4      0   100    LUM   Luminal Papilla…     0.134       0.317        0.257 #> 10 RT4V6    0   100    LUM   Luminal Papilla…     0.143       0.207        0.165 #> # ℹ 20 more rows #> # ℹ 4 more variables: .pred_MS2a.2 <dbl>, .pred_MS2b.1 <dbl>, #> #   .pred_MS2b2.1 <dbl>, .pred_MS2b2.2 <dbl> plotting_data <- out |>   pivot_longer(cols = starts_with(\".pred\"))  plotting_data |>   ggplot(aes(cell, value, color = name)) +   geom_point() +   facet_grid(~clade, scales = \"free_x\", space = \"free_x\") table <- plotting_data |>   summarize(winner = name[which.max(value)], .by = c(cell, clade)) |>   mutate(     five = case_when(       winner %in% c(\".pred_MS1a\", \".pred_MS1b\") ~ \"Urobasal A\",       winner %in% c(\".pred_MS2a.1\", \".pred_MS2a.2\") ~ \"Genomically unstable\",       winner == \".pred_MS2b.1\" ~ \"Infiltrated\",       winner == \".pred_MS2b2.1\" ~ \"Uro-B\",       winner == \".pred_MS2b2.2\" ~ \"SCC-like\"     )   ) |>   relocate(cell, five, clade)  print(table, n = 30) #> # A tibble: 30 × 4 #>    cell   five                 clade             winner        #>    <chr>  <chr>                <fct>             <chr>         #>  1 1A6    SCC-like             Epithelial Other  .pred_MS2b2.2 #>  2 253JP  SCC-like             Unknown           .pred_MS2b2.2 #>  3 5637   SCC-like             Epithelial Other  .pred_MS2b2.2 #>  4 BV     Urobasal A           Unknown           .pred_MS1b    #>  5 HT1197 SCC-like             Epithelial Other  .pred_MS2b2.2 #>  6 HT1376 SCC-like             Epithelial Other  .pred_MS2b2.2 #>  7 J82    Urobasal A           Mesenchymal       .pred_MS1b    #>  8 RT112  Urobasal A           Luminal Papillary .pred_MS1b    #>  9 RT4    Urobasal A           Luminal Papillary .pred_MS1b    #> 10 RT4V6  Urobasal A           Luminal Papillary .pred_MS1b    #> 11 SCaBER SCC-like             Epithelial Other  .pred_MS2b2.2 #> 12 SW780  Urobasal A           Luminal Papillary .pred_MS1b    #> 13 T24    SCC-like             Mesenchymal       .pred_MS2b2.2 #> 14 TCCSup SCC-like             Mesenchymal       .pred_MS2b2.2 #> 15 UC10   SCC-like             Epithelial Other  .pred_MS2b2.2 #> 16 UC11   SCC-like             Mesenchymal       .pred_MS2b2.2 #> 17 UC12   Urobasal A           Mesenchymal       .pred_MS1b    #> 18 UC13   SCC-like             Mesenchymal       .pred_MS2b2.2 #> 19 UC14   Urobasal A           Luminal Papillary .pred_MS1b    #> 20 UC15   SCC-like             Epithelial Other  .pred_MS2b2.2 #> 21 UC16   SCC-like             Epithelial Other  .pred_MS2b2.2 #> 22 UC17   SCC-like             Luminal Papillary .pred_MS2b2.2 #> 23 UC18   SCC-like             Mesenchymal       .pred_MS2b2.2 #> 24 UC1    Urobasal A           Luminal Papillary .pred_MS1b    #> 25 UC3    SCC-like             Mesenchymal       .pred_MS2b2.2 #> 26 UC4    Urobasal A           Unknown           .pred_MS1b    #> 27 UC5    Urobasal A           Luminal Papillary .pred_MS1b    #> 28 UC6    Urobasal A           Luminal Papillary .pred_MS1b    #> 29 UC7    Urobasal A           Epithelial Other  .pred_MS1b    #> 30 UC9    Genomically unstable Epithelial Other  .pred_MS2a.1"},{"path":"https://kaiaragaki.github.io/reclanc/articles/using-reclanc.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"using-reclanc","text":"vignette provide brief introduction basic usage reclanc. ’re interested reclanc works, ’d recommend reading blog post wrote original paper Alan Dabney, created original ClaNC algorithm.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/articles/using-reclanc.html","id":"fitting","dir":"Articles","previous_headings":"","what":"Fitting","title":"using-reclanc","text":"create new centroids existing expression data, use clanc function. reclanc provides synthetic expression data can work : data include 12 samples, 6 class “” 6 class “B”, 100 genes. reclanc able ingest variety common formats expression data. Objects like SummarizedExperiments ExpressionSets frequently used bioinformatic analyses arrange data samples columns genes rows. conflict expected formula input base R, predictors (genes) outcomes (classes) columns. reclanc eases friction expecting input common format, abstracting away wrangling aspect analysis. , two broad categories input - ‘wide data’ ‘tall data’.","code":"library(reclanc) library(SummarizedExperiment) library(parsnip) lapply(synthetic_expression, head) #> $expression #>        sample1  sample2  sample3  sample4  sample5  sample6  sample7  sample8 #> gene1 8.097529 7.119188 7.304400 7.554689 7.953206 7.714925 7.512700 8.597547 #> gene2 8.641837 9.400416 8.500865 8.878687 8.318438 8.728683 7.812591 7.638167 #> gene3 3.436236 4.317915 3.435193 3.515755 3.024976 4.762209 5.048956 2.006646 #> gene4 4.368008 5.212750 4.618249 4.201365 3.195294 4.707750 5.126769 6.178658 #> gene5 2.423974 3.563816 4.062362 2.163278 2.021435 2.813873 0.000000 4.652358 #> gene6 5.371205 5.919809 4.366915 4.805534 4.834856 5.622157 3.883531 3.593082 #>        sample9 sample10 sample11 sample12 #> gene1 6.475641 7.648858 8.637526 7.345038 #> gene2 8.110285 7.906104 7.424728 7.927039 #> gene3 2.739211 3.111668 3.161077 4.306611 #> gene4 5.170265 4.259578 5.872855 6.159023 #> gene5 1.532242 3.399823 3.691250 1.932937 #> gene6 4.246205 4.637316 3.575837 2.730452 #>  #> $classes #> [1] A A A A A A #> Levels: A B"},{"path":"https://kaiaragaki.github.io/reclanc/articles/using-reclanc.html","id":"wide-inputs","dir":"Articles","previous_headings":"Fitting","what":"Wide inputs","title":"using-reclanc","text":"Wide inputs require data predictors outcomes columns, together, single data.frame.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/articles/using-reclanc.html","id":"formula","dir":"Articles","previous_headings":"Fitting > Wide inputs","what":"Formula","title":"using-reclanc","text":"","code":"form_data <- cbind(   class = synthetic_expression$classes,   as.data.frame(t(synthetic_expression$expression)) ) head(form_data[1:5]) #>         class    gene1    gene2    gene3    gene4 #> sample1     A 8.097529 8.641837 3.436236 4.368008 #> sample2     A 7.119188 9.400416 4.317915 5.212750 #> sample3     A 7.304400 8.500865 3.435193 4.618249 #> sample4     A 7.554689 8.878687 3.515755 4.201365 #> sample5     A 7.953206 8.318438 3.024976 3.195294 #> sample6     A 7.714925 8.728683 4.762209 4.707750 clanc(class ~ ., form_data, active = 5) #> <clanc>  #> $centroids #>    class   gene expression pooled_sd active prior #> 1      A gene12   7.514718 0.4779155      5   0.5 #> 2      A  gene2   8.744821 0.3147537      5   0.5 #> 3      A gene13   8.936462 0.3418472      5   0.5 #> 4      A gene21   6.584681 0.5279636      5   0.5 #> 5      A gene24   4.307301 0.7214700      5   0.5 #> 6      A gene74   4.028507 0.4940783      5   0.5 #> 7      A gene41   4.328516 0.6317005      5   0.5 #> 8      A gene95   6.873184 0.4462475      5   0.5 #> 9      A gene52   3.743798 0.5173769      5   0.5 #> 10     A gene66   7.008174 0.5883218      5   0.5 #> 11     B gene12   8.072284 0.4779155      5   0.5 #> 12     B gene13   9.938137 0.3418472      5   0.5 #> 13     B  gene2   8.273987 0.3147537      5   0.5 #> 14     B gene24   3.370467 0.7214700      5   0.5 #> 15     B gene21   5.789423 0.5279636      5   0.5 #> 16     B gene41   5.518354 0.6317005      5   0.5 #> 17     B gene74   3.226598 0.4940783      5   0.5 #> 18     B gene52   2.438579 0.5173769      5   0.5 #> 19     B gene95   6.288173 0.4462475      5   0.5 #> 20     B gene66   7.891588 0.5883218      5   0.5"},{"path":"https://kaiaragaki.github.io/reclanc/articles/using-reclanc.html","id":"recipe","dir":"Articles","previous_headings":"Fitting > Wide inputs","what":"recipe","title":"using-reclanc","text":"reclanc also supports tidymodels workflows:","code":"discrim_linear() |>   set_engine(\"clanc\", active = 5) |>   fit(class ~ ., data = form_data) #> parsnip model object #>  #> <clanc>  #> $centroids #>    class   gene expression pooled_sd active prior #> 1      A gene12   7.514718 0.4779155      5   0.5 #> 2      A  gene2   8.744821 0.3147537      5   0.5 #> 3      A gene13   8.936462 0.3418472      5   0.5 #> 4      A gene21   6.584681 0.5279636      5   0.5 #> 5      A gene24   4.307301 0.7214700      5   0.5 #> 6      A gene74   4.028507 0.4940783      5   0.5 #> 7      A gene41   4.328516 0.6317005      5   0.5 #> 8      A gene95   6.873184 0.4462475      5   0.5 #> 9      A gene52   3.743798 0.5173769      5   0.5 #> 10     A gene66   7.008174 0.5883218      5   0.5 #> 11     B gene12   8.072284 0.4779155      5   0.5 #> 12     B gene13   9.938137 0.3418472      5   0.5 #> 13     B  gene2   8.273987 0.3147537      5   0.5 #> 14     B gene24   3.370467 0.7214700      5   0.5 #> 15     B gene21   5.789423 0.5279636      5   0.5 #> 16     B gene41   5.518354 0.6317005      5   0.5 #> 17     B gene74   3.226598 0.4940783      5   0.5 #> 18     B gene52   2.438579 0.5173769      5   0.5 #> 19     B gene95   6.288173 0.4462475      5   0.5 #> 20     B gene66   7.891588 0.5883218      5   0.5"},{"path":"https://kaiaragaki.github.io/reclanc/articles/using-reclanc.html","id":"tall-inputs","dir":"Articles","previous_headings":"Fitting","what":"Tall inputs","title":"using-reclanc","text":"Tall inputs require genes rows samples columns","code":""},{"path":"https://kaiaragaki.github.io/reclanc/articles/using-reclanc.html","id":"data-framematrix","dir":"Articles","previous_headings":"Fitting > Tall inputs","what":"data.frame/matrix","title":"using-reclanc","text":"often convenient supply data.frame, particularly data-munging done. data.frame matrix inputs require expression genes column names sample IDs rownames, well factor vector classes:","code":"clanc(   synthetic_expression$expression,   classes = synthetic_expression$classes,   active = 5 ) #> <clanc>  #> $centroids #>    class   gene expression pooled_sd active prior #> 1      A gene12   7.514718 0.4779155      5   0.5 #> 2      A  gene2   8.744821 0.3147537      5   0.5 #> 3      A gene13   8.936462 0.3418472      5   0.5 #> 4      A gene21   6.584681 0.5279636      5   0.5 #> 5      A gene24   4.307301 0.7214700      5   0.5 #> 6      A gene74   4.028507 0.4940783      5   0.5 #> 7      A gene41   4.328516 0.6317005      5   0.5 #> 8      A gene95   6.873184 0.4462475      5   0.5 #> 9      A gene52   3.743798 0.5173769      5   0.5 #> 10     A gene66   7.008174 0.5883218      5   0.5 #> 11     B gene12   8.072284 0.4779155      5   0.5 #> 12     B gene13   9.938137 0.3418472      5   0.5 #> 13     B  gene2   8.273987 0.3147537      5   0.5 #> 14     B gene24   3.370467 0.7214700      5   0.5 #> 15     B gene21   5.789423 0.5279636      5   0.5 #> 16     B gene41   5.518354 0.6317005      5   0.5 #> 17     B gene74   3.226598 0.4940783      5   0.5 #> 18     B gene52   2.438579 0.5173769      5   0.5 #> 19     B gene95   6.288173 0.4462475      5   0.5 #> 20     B gene66   7.891588 0.5883218      5   0.5"},{"path":"https://kaiaragaki.github.io/reclanc/articles/using-reclanc.html","id":"summarizedexperiment","dir":"Articles","previous_headings":"Fitting > Tall inputs","what":"SummarizedExperiment","title":"using-reclanc","text":"common formats expression SummarizedExperiments ExpressionSets: can specify name colData (pData ExpressionSets) column contains classes classes argument:","code":"se <- SummarizedExperiment(   synthetic_expression$expression,   colData = DataFrame(class = synthetic_expression$classes) ) se #> class: SummarizedExperiment  #> dim: 100 12  #> metadata(0): #> assays(1): '' #> rownames(100): gene1 gene2 ... gene99 gene100 #> rowData names(0): #> colnames(12): sample1 sample2 ... sample11 sample12 #> colData names(1): class fit <- clanc(   se,   classes = \"class\",   active = 20,   assay = 1 # Index of assay - SummarizedExperiments only ) fit #> <clanc>  #> $centroids #>    class    gene expression pooled_sd active prior #> 1      A  gene11  2.2992343 1.2044848     20   0.5 #> 2      A   gene2  8.7448209 0.3147537     20   0.5 #> 3      A  gene13  8.9364621 0.3418472     20   0.5 #> 4      A  gene20  2.1925558 1.3104010     20   0.5 #> 5      A  gene10  4.9557850 0.8571716     20   0.5 #> 6      A  gene21  6.5846813 0.5279636     20   0.5 #> 7      A gene100  5.6455200 0.6175104     20   0.5 #> 8      A  gene22  6.1650079 0.4699756     20   0.5 #> 9      A  gene46  6.7344030 0.8233370     20   0.5 #> 10     A  gene24  4.3073008 0.7214700     20   0.5 #> 11     A  gene15  2.4254020 1.1910158     20   0.5 #> 12     A  gene25  5.0353875 0.7498139     20   0.5 #> 13     A  gene17  2.9424148 0.6628466     20   0.5 #> 14     A   gene4  4.3839026 0.7144711     20   0.5 #> 15     A  gene56  6.3441126 0.4078736     20   0.5 #> 16     A  gene41  4.3285163 0.6317005     20   0.5 #> 17     A  gene57  4.2237139 0.9531773     20   0.5 #> 18     A   gene7  5.5545202 0.7875124     20   0.5 #> 19     A  gene58  5.6162919 0.8161951     20   0.5 #> 20     A  gene12  7.5147181 0.4779155     20   0.5 #> 21     A   gene6  5.1534126 0.6194184     20   0.5 #> 22     A  gene51  6.6256136 0.7737520     20   0.5 #> 23     A  gene60  4.7434923 1.2945446     20   0.5 #> 24     A  gene52  3.7437977 0.5173769     20   0.5 #> 25     A  gene63  8.9293980 0.5635262     20   0.5 #> 26     A  gene53  4.3774614 0.8370528     20   0.5 #> 27     A  gene66  7.0081742 0.5883218     20   0.5 #> 28     A  gene83  3.6532038 0.8444393     20   0.5 #> 29     A  gene67  6.1384613 0.3677756     20   0.5 #> 30     A  gene85  5.2179679 0.5930857     20   0.5 #> 31     A  gene88  4.6008044 1.0603007     20   0.5 #> 32     A  gene70  1.3073340 1.1264747     20   0.5 #> 33     A  gene47  9.4528373 0.2030726     20   0.5 #> 34     A  gene90  0.9794695 1.3272423     20   0.5 #> 35     A  gene74  4.0285071 0.4940783     20   0.5 #> 36     A  gene94  7.7773183 0.5375914     20   0.5 #> 37     A  gene78  2.1763395 1.6805560     20   0.5 #> 38     A  gene95  6.8731844 0.4462475     20   0.5 #> 39     A  gene79  3.7138831 1.0587367     20   0.5 #> 40     A  gene98  4.5710407 0.6798799     20   0.5 #> 41     B  gene10  4.2378889 0.8571716     20   0.5 #> 42     B   gene2  8.2739866 0.3147537     20   0.5 #> 43     B gene100  5.0435040 0.6175104     20   0.5 #> 44     B  gene20  3.4781598 1.3104010     20   0.5 #> 45     B  gene46  7.0200767 0.8233370     20   0.5 #> 46     B  gene11  1.2780748 1.2044848     20   0.5 #> 47     B  gene12  8.0722841 0.4779155     20   0.5 #> 48     B  gene22  6.4609169 0.4699756     20   0.5 #> 49     B  gene51  5.8920005 0.7737520     20   0.5 #> 50     B  gene13  9.9381374 0.3418472     20   0.5 #> 51     B  gene15  1.6008569 1.1910158     20   0.5 #> 52     B  gene25  4.5015558 0.7498139     20   0.5 #> 53     B  gene17  2.5005839 0.6628466     20   0.5 #> 54     B   gene4  4.9225469 0.7144711     20   0.5 #> 55     B  gene56  6.1067832 0.4078736     20   0.5 #> 56     B  gene41  5.5183538 0.6317005     20   0.5 #> 57     B  gene57  3.1175271 0.9531773     20   0.5 #> 58     B   gene7  5.3367575 0.7875124     20   0.5 #> 59     B  gene21  5.7894231 0.5279636     20   0.5 #> 60     B  gene47  9.5903798 0.2030726     20   0.5 #> 61     B   gene6  4.4655748 0.6194184     20   0.5 #> 62     B  gene74  3.2265977 0.4940783     20   0.5 #> 63     B  gene24  3.3704670 0.7214700     20   0.5 #> 64     B  gene52  2.4385792 0.5173769     20   0.5 #> 65     B  gene63  8.3234317 0.5635262     20   0.5 #> 66     B  gene53  3.8479638 0.8370528     20   0.5 #> 67     B  gene66  7.8915875 0.5883218     20   0.5 #> 68     B  gene83  4.2757218 0.8444393     20   0.5 #> 69     B  gene67  6.0190764 0.3677756     20   0.5 #> 70     B  gene85  5.8877225 0.5930857     20   0.5 #> 71     B  gene79  4.1894417 1.0587367     20   0.5 #> 72     B  gene58  4.7194615 0.8161951     20   0.5 #> 73     B  gene88  5.5945405 1.0603007     20   0.5 #> 74     B  gene70  1.5987845 1.1264747     20   0.5 #> 75     B  gene90  1.4036889 1.3272423     20   0.5 #> 76     B  gene60  5.2336968 1.2945446     20   0.5 #> 77     B  gene78  1.6625207 1.6805560     20   0.5 #> 78     B  gene95  6.2881728 0.4462475     20   0.5 #> 79     B  gene98  4.1346296 0.6798799     20   0.5 #> 80     B  gene94  8.4222554 0.5375914     20   0.5"},{"path":"https://kaiaragaki.github.io/reclanc/articles/using-reclanc.html","id":"predicting","dir":"Articles","previous_headings":"","what":"Predicting","title":"using-reclanc","text":"fit can used predict classes new samples new data. new data can come form matrix, data.frame, SummarizedExperiment, ExpressionSet, expected input Using type = \"class\" predict classes using metric provided Alan Dabney original ClaNC paper. However, particularly comparing across datasets may transformed differently, may accurate use correlation based metric:","code":"predict(fit, new_data = se, type = \"class\") #> 40/40 (100%) genes in centroids found in data #> # A tibble: 12 × 1 #>    .pred_class #>    <fct>       #>  1 A           #>  2 A           #>  3 A           #>  4 A           #>  5 A           #>  6 A           #>  7 B           #>  8 B           #>  9 B           #> 10 B           #> 11 B           #> 12 B predict(fit, new_data = se, type = \"numeric\", method = \"spearman\") #> 40/40 (100%) genes in centroids found in data #> # A tibble: 12 × 2 #>    .pred_A .pred_B #>      <dbl>   <dbl> #>  1   0.901   0.811 #>  2   0.929   0.849 #>  3   0.932   0.840 #>  4   0.912   0.829 #>  5   0.862   0.770 #>  6   0.932   0.869 #>  7   0.776   0.904 #>  8   0.824   0.931 #>  9   0.828   0.924 #> 10   0.855   0.946 #> 11   0.805   0.915 #> 12   0.750   0.869"},{"path":"https://kaiaragaki.github.io/reclanc/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Kai Aragaki. Author, maintainer. Alan Dabney. Author, copyright holder.           Original creator ClaNC","code":""},{"path":"https://kaiaragaki.github.io/reclanc/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Alan D (2005). “Classification microarrays nearest centroids.” Bioinformatics, 21(22), 4148-4154. doi:10.1093/bioinformatics/bti681.","code":"@Article{,   title = {Classification of microarrays to nearest centroids},   author = {Dabney Alan},   journal = {Bioinformatics},   year = {2005},   volume = {21},   number = {22},   pages = {4148-4154},   doi = {10.1093/bioinformatics/bti681}, }"},{"path":"https://kaiaragaki.github.io/reclanc/index.html","id":"reclanc","dir":"","previous_headings":"","what":"A Revival of the ClaNC Algorithm","title":"A Revival of the ClaNC Algorithm","text":"reclanc revival ClaNC (Classification microarrays nearest centroids), Alan R. Dabney. Since source lost (least knowledge), code comes heavy modification. reclanc nearest-centroid classifier expression data. tends little sensitive accurate similar models like PAM. Besides mere existence, reclanc differs slightly original ClaNC package ways: reclanc supports wide variety inputs (data.frame, matrix, formula, recipe, ExpressionSet, SummarizedExperiment) reclanc plays nicely tidymodels, offloads things like making folds rsample tuning tune (see vignette leverage tidymodels reclanc). Provides prediction method based correlation, rather distance - useful predicting classes data different sequencing modalities","code":""},{"path":"https://kaiaragaki.github.io/reclanc/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"A Revival of the ClaNC Algorithm","text":"can install development version reclanc like :","code":"# install.packages(\"pak\") pak::pak(\"KaiAragaki/reclanc\")"},{"path":"https://kaiaragaki.github.io/reclanc/index.html","id":"how-to-use-it","dir":"","previous_headings":"","what":"How to use it","title":"A Revival of the ClaNC Algorithm","text":"information basic usage, see vignette. case study, well optimize active parameter, see vignette.","code":"library(reclanc) lapply(synthetic_expression, head) # dummy data #> $expression #>        sample1  sample2  sample3  sample4  sample5  sample6  sample7  sample8 #> gene1 8.097529 7.119188 7.304400 7.554689 7.953206 7.714925 7.512700 8.597547 #> gene2 8.641837 9.400416 8.500865 8.878687 8.318438 8.728683 7.812591 7.638167 #> gene3 3.436236 4.317915 3.435193 3.515755 3.024976 4.762209 5.048956 2.006646 #> gene4 4.368008 5.212750 4.618249 4.201365 3.195294 4.707750 5.126769 6.178658 #> gene5 2.423974 3.563816 4.062362 2.163278 2.021435 2.813873 0.000000 4.652358 #> gene6 5.371205 5.919809 4.366915 4.805534 4.834856 5.622157 3.883531 3.593082 #>        sample9 sample10 sample11 sample12 #> gene1 6.475641 7.648858 8.637526 7.345038 #> gene2 8.110285 7.906104 7.424728 7.927039 #> gene3 2.739211 3.111668 3.161077 4.306611 #> gene4 5.170265 4.259578 5.872855 6.159023 #> gene5 1.532242 3.399823 3.691250 1.932937 #> gene6 4.246205 4.637316 3.575837 2.730452 #>  #> $classes #> [1] A A A A A A #> Levels: A B centroids <- clanc(   synthetic_expression$expression,   classes = synthetic_expression$classes,   active = 5 ) centroids #> <clanc>  #> $centroids #>    class   gene expression pooled_sd active prior #> 1      A gene13   8.936462 0.3418472      5   0.5 #> 2      A gene21   7.379940 0.5279636      5   0.5 #> 3      A  gene2   8.744821 0.3147537      5   0.5 #> 4      A gene74   4.028507 0.4940783      5   0.5 #> 5      A gene41   4.328516 0.6317005      5   0.5 #> 6      A gene66   6.124761 0.5883218      5   0.5 #> 7      A gene24   4.307301 0.7214700      5   0.5 #> 8      A gene95   6.288173 0.4462475      5   0.5 #> 9      A gene94   7.777318 0.5375914      5   0.5 #> 10     A gene52   3.743798 0.5173769      5   0.5 #> 11     B gene13   9.938137 0.3418472      5   0.5 #> 12     B  gene2   8.273987 0.3147537      5   0.5 #> 13     B gene21   6.584681 0.5279636      5   0.5 #> 14     B gene41   5.518354 0.6317005      5   0.5 #> 15     B gene74   3.226598 0.4940783      5   0.5 #> 16     B gene24   3.370467 0.7214700      5   0.5 #> 17     B gene66   7.008174 0.5883218      5   0.5 #> 18     B gene94   8.422255 0.5375914      5   0.5 #> 19     B gene95   5.703161 0.4462475      5   0.5 #> 20     B gene52   2.438579 0.5173769      5   0.5"},{"path":"https://kaiaragaki.github.io/reclanc/index.html","id":"how-it-works","dir":"","previous_headings":"","what":"How it works","title":"A Revival of the ClaNC Algorithm","text":"can find gentle introduction reclanc works -depth statistically rigorous definition algorithm works original paper.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/index.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"A Revival of the ClaNC Algorithm","text":"Citation original ClaNC paper: Alan R. Dabney, Classification microarrays nearest centroids, Bioinformatics, Volume 21, Issue 22, November 2005, Pages 4148–4154, https://doi.org/10.1093/bioinformatics/bti681","code":""},{"path":"https://kaiaragaki.github.io/reclanc/reference/clanc.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate centroids from expression data with ClaNC — clanc","title":"Calculate centroids from expression data with ClaNC — clanc","text":"Calculate centroids expression data ClaNC","code":""},{"path":"https://kaiaragaki.github.io/reclanc/reference/clanc.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate centroids from expression data with ClaNC — clanc","text":"","code":"clanc(x, ...)  # Default S3 method clanc(x, ...)  # S3 method for class 'data.frame' clanc(x, classes, active, priors = \"equal\", ...)  # S3 method for class 'matrix' clanc(x, classes, active, priors = \"equal\", ...)  # S3 method for class 'SummarizedExperiment' clanc(x, classes, active, priors = \"equal\", assay = 1, ...)  # S3 method for class 'ExpressionSet' clanc(x, classes, active, priors = \"equal\", ...)  # S3 method for class 'formula' clanc(formula, data, active, priors = \"equal\", ...)  # S3 method for class 'recipe' clanc(x, data, active, priors = \"equal\", ...)"},{"path":"https://kaiaragaki.github.io/reclanc/reference/clanc.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate centroids from expression data with ClaNC — clanc","text":"x Depending context: data frame expression. matrix expression. recipe specifying set preprocessing steps created recipes::recipe(). ExpressionSet. SummarizedExperiment assay containing expression. Expression library-size corrected, scaled. supplying data frame, matrix, ExpressionSet, SummarizedExperiment, rows represent genes, columns represent samples (standard expression data). column names sample IDs, row names gene IDs. recipe provided, data genes columns (match formula provided recipe.) ... currently used, required extensibility. classes x data frame matrix, class contains class labels form either: data frame 1 factor column factor vector. x ExpressionSet SummarizedExperiment, class name column pData(x) colData(x) contains classes factor. active Either single number numeric vector equal length number unique class labels. Represents number class-specific genes selected centroid. Note different numbers genes can selected class. See details. x ExpressionSet SummarizedExperiment, active can additionally name column pData(x) colData(x) contains numeric vector priors Can take variety values: \"equal\" - class equal prior \"class\" - class prior equal frequency training set numeric vector length equal number classes x ExpressionSet SummarizedExperiment, active can additionally name column pData(x) colData(x) contains numeric vector assay SummarizedExperiment used, index name assay formula formula specifying classes left-hand side, predictor terms right-hand side. data recipe formula used, data specified : data frame containing expression classes, columns genes class, rows samples.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/reference/clanc.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate centroids from expression data with ClaNC — clanc","text":"clanc object.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/reference/clanc.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Calculate centroids from expression data with ClaNC — clanc","text":"original description ClaNC can found active sets number class-specific genes, centroid number genes. explain way example, active = 5 3 classes, centroid 15 genes, 5 genes particular given class. genes 'active' class, values mean class. genes active given class, values overall expression given gene across classes.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/reference/clanc.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculate centroids from expression data with ClaNC — clanc","text":"","code":"expression_matrix <- synthetic_expression$expression head(expression_matrix) #>        sample1  sample2  sample3  sample4  sample5  sample6  sample7  sample8 #> gene1 8.097529 7.119188 7.304400 7.554689 7.953206 7.714925 7.512700 8.597547 #> gene2 8.641837 9.400416 8.500865 8.878687 8.318438 8.728683 7.812591 7.638167 #> gene3 3.436236 4.317915 3.435193 3.515755 3.024976 4.762209 5.048956 2.006646 #> gene4 4.368008 5.212750 4.618249 4.201365 3.195294 4.707750 5.126769 6.178658 #> gene5 2.423974 3.563816 4.062362 2.163278 2.021435 2.813873 0.000000 4.652358 #> gene6 5.371205 5.919809 4.366915 4.805534 4.834856 5.622157 3.883531 3.593082 #>        sample9 sample10 sample11 sample12 #> gene1 6.475641 7.648858 8.637526 7.345038 #> gene2 8.110285 7.906104 7.424728 7.927039 #> gene3 2.739211 3.111668 3.161077 4.306611 #> gene4 5.170265 4.259578 5.872855 6.159023 #> gene5 1.532242 3.399823 3.691250 1.932937 #> gene6 4.246205 4.637316 3.575837 2.730452 classes <- synthetic_expression$classes classes #>  [1] A A A A A A B B B B B B #> Levels: A B  # data.frame/tibble/matrix interface:  clanc(expression_matrix, classes = classes, active = 5, priors = \"equal\") #> <clanc>  #> $centroids #>    class   gene expression pooled_sd active prior #> 1      A gene12   7.514718 0.4779155      5   0.5 #> 2      A  gene2   8.744821 0.3147537      5   0.5 #> 3      A gene13   8.936462 0.3418472      5   0.5 #> 4      A gene21   6.584681 0.5279636      5   0.5 #> 5      A gene24   4.307301 0.7214700      5   0.5 #> 6      A gene74   4.028507 0.4940783      5   0.5 #> 7      A gene41   4.328516 0.6317005      5   0.5 #> 8      A gene95   6.873184 0.4462475      5   0.5 #> 9      A gene52   3.743798 0.5173769      5   0.5 #> 10     A gene66   7.008174 0.5883218      5   0.5 #> 11     B gene12   8.072284 0.4779155      5   0.5 #> 12     B gene13   9.938137 0.3418472      5   0.5 #> 13     B  gene2   8.273987 0.3147537      5   0.5 #> 14     B gene24   3.370467 0.7214700      5   0.5 #> 15     B gene21   5.789423 0.5279636      5   0.5 #> 16     B gene41   5.518354 0.6317005      5   0.5 #> 17     B gene74   3.226598 0.4940783      5   0.5 #> 18     B gene52   2.438579 0.5173769      5   0.5 #> 19     B gene95   6.288173 0.4462475      5   0.5 #> 20     B gene66   7.891588 0.5883218      5   0.5 #>   # Formula interface:  # Data must have class included as a column # Genes must be *columns* and samples must be *rows* # Hence the data transposition. for_formula <- data.frame(class = classes, t(expression_matrix))  clanc(class ~ ., for_formula, active = 5, priors = \"equal\") #> <clanc>  #> $centroids #>    class   gene expression pooled_sd active prior #> 1      A gene12   7.514718 0.4779155      5   0.5 #> 2      A  gene2   8.744821 0.3147537      5   0.5 #> 3      A gene13   8.936462 0.3418472      5   0.5 #> 4      A gene21   6.584681 0.5279636      5   0.5 #> 5      A gene24   4.307301 0.7214700      5   0.5 #> 6      A gene74   4.028507 0.4940783      5   0.5 #> 7      A gene41   4.328516 0.6317005      5   0.5 #> 8      A gene95   6.873184 0.4462475      5   0.5 #> 9      A gene52   3.743798 0.5173769      5   0.5 #> 10     A gene66   7.008174 0.5883218      5   0.5 #> 11     B gene12   8.072284 0.4779155      5   0.5 #> 12     B gene13   9.938137 0.3418472      5   0.5 #> 13     B  gene2   8.273987 0.3147537      5   0.5 #> 14     B gene24   3.370467 0.7214700      5   0.5 #> 15     B gene21   5.789423 0.5279636      5   0.5 #> 16     B gene41   5.518354 0.6317005      5   0.5 #> 17     B gene74   3.226598 0.4940783      5   0.5 #> 18     B gene52   2.438579 0.5173769      5   0.5 #> 19     B gene95   6.288173 0.4462475      5   0.5 #> 20     B gene66   7.891588 0.5883218      5   0.5 #>    # Recipes interface:  rec <- recipes::recipe(class ~ ., data = for_formula)  clanc(rec, for_formula, active = 5, priors = \"equal\") #> <clanc>  #> $centroids #>    class   gene expression pooled_sd active prior #> 1      A gene12   7.514718 0.4779155      5   0.5 #> 2      A  gene2   8.744821 0.3147537      5   0.5 #> 3      A gene13   8.936462 0.3418472      5   0.5 #> 4      A gene21   6.584681 0.5279636      5   0.5 #> 5      A gene24   4.307301 0.7214700      5   0.5 #> 6      A gene74   4.028507 0.4940783      5   0.5 #> 7      A gene41   4.328516 0.6317005      5   0.5 #> 8      A gene95   6.873184 0.4462475      5   0.5 #> 9      A gene52   3.743798 0.5173769      5   0.5 #> 10     A gene66   7.008174 0.5883218      5   0.5 #> 11     B gene12   8.072284 0.4779155      5   0.5 #> 12     B gene13   9.938137 0.3418472      5   0.5 #> 13     B  gene2   8.273987 0.3147537      5   0.5 #> 14     B gene24   3.370467 0.7214700      5   0.5 #> 15     B gene21   5.789423 0.5279636      5   0.5 #> 16     B gene41   5.518354 0.6317005      5   0.5 #> 17     B gene74   3.226598 0.4940783      5   0.5 #> 18     B gene52   2.438579 0.5173769      5   0.5 #> 19     B gene95   6.288173 0.4462475      5   0.5 #> 20     B gene66   7.891588 0.5883218      5   0.5 #>   # SummarizedExperiment interface: se <- SummarizedExperiment::SummarizedExperiment(   expression_matrix,   colData = data.frame(     class = classes,     active = 5,     prior = c(0.5, 0.5)   ) )  clanc(se, classes = \"class\", active = \"active\", priors = \"equal\") #> <clanc>  #> $centroids #>    class   gene expression pooled_sd active prior #> 1      A gene12   7.514718 0.4779155      5   0.5 #> 2      A  gene2   8.744821 0.3147537      5   0.5 #> 3      A gene13   8.936462 0.3418472      5   0.5 #> 4      A gene21   6.584681 0.5279636      5   0.5 #> 5      A gene24   4.307301 0.7214700      5   0.5 #> 6      A gene74   4.028507 0.4940783      5   0.5 #> 7      A gene41   4.328516 0.6317005      5   0.5 #> 8      A gene95   6.873184 0.4462475      5   0.5 #> 9      A gene52   3.743798 0.5173769      5   0.5 #> 10     A gene66   7.008174 0.5883218      5   0.5 #> 11     B gene12   8.072284 0.4779155      5   0.5 #> 12     B gene13   9.938137 0.3418472      5   0.5 #> 13     B  gene2   8.273987 0.3147537      5   0.5 #> 14     B gene24   3.370467 0.7214700      5   0.5 #> 15     B gene21   5.789423 0.5279636      5   0.5 #> 16     B gene41   5.518354 0.6317005      5   0.5 #> 17     B gene74   3.226598 0.4940783      5   0.5 #> 18     B gene52   2.438579 0.5173769      5   0.5 #> 19     B gene95   6.288173 0.4462475      5   0.5 #> 20     B gene66   7.891588 0.5883218      5   0.5 #>   # ExpressionSet interface: adf <- data.frame(   row.names = colnames(expression_matrix),   class = classes ) |>   Biobase::AnnotatedDataFrame()  es <- Biobase::ExpressionSet(expression_matrix, adf) clanc(es, classes = \"class\", active = 5, priors = 0.5) #> <clanc>  #> $centroids #>    class   gene expression pooled_sd active prior #> 1      A gene12   7.514718 0.4779155      5   0.5 #> 2      A  gene2   8.744821 0.3147537      5   0.5 #> 3      A gene13   8.936462 0.3418472      5   0.5 #> 4      A gene21   6.584681 0.5279636      5   0.5 #> 5      A gene24   4.307301 0.7214700      5   0.5 #> 6      A gene74   4.028507 0.4940783      5   0.5 #> 7      A gene41   4.328516 0.6317005      5   0.5 #> 8      A gene95   6.873184 0.4462475      5   0.5 #> 9      A gene52   3.743798 0.5173769      5   0.5 #> 10     A gene66   7.008174 0.5883218      5   0.5 #> 11     B gene12   8.072284 0.4779155      5   0.5 #> 12     B gene13   9.938137 0.3418472      5   0.5 #> 13     B  gene2   8.273987 0.3147537      5   0.5 #> 14     B gene24   3.370467 0.7214700      5   0.5 #> 15     B gene21   5.789423 0.5279636      5   0.5 #> 16     B gene41   5.518354 0.6317005      5   0.5 #> 17     B gene74   3.226598 0.4940783      5   0.5 #> 18     B gene52   2.438579 0.5173769      5   0.5 #> 19     B gene95   6.288173 0.4462475      5   0.5 #> 20     B gene66   7.891588 0.5883218      5   0.5 #>"},{"path":"https://kaiaragaki.github.io/reclanc/reference/predict.clanc.html","id":null,"dir":"Reference","previous_headings":"","what":"Predict from a clanc — predict.clanc","title":"Predict from a clanc — predict.clanc","text":"Predict clanc","code":""},{"path":"https://kaiaragaki.github.io/reclanc/reference/predict.clanc.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Predict from a clanc — predict.clanc","text":"","code":"# S3 method for class 'clanc' predict(object, new_data, type, assay = NULL, format = c(\"wide\", \"tall\"), ...)"},{"path":"https://kaiaragaki.github.io/reclanc/reference/predict.clanc.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Predict from a clanc — predict.clanc","text":"object clanc object. new_data data frame matrix new predictors. type single character. type predictions generate. Valid options : \"numeric\" numeric predictions. assay object inherits SummarizedExperiment, index assay. format Character. data \"wide\" (default), genes columns, \"tall\", genes rows? ... used, required extensibility. method type numeric, method correlation","code":""},{"path":"https://kaiaragaki.github.io/reclanc/reference/predict.clanc.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Predict from a clanc — predict.clanc","text":"tibble predictions. number rows tibble guaranteed number rows new_data.","code":""},{"path":"https://kaiaragaki.github.io/reclanc/reference/synthetic_expression.html","id":null,"dir":"Reference","previous_headings":"","what":"Synthetic Expression of Two Distinct Classes — synthetic_expression","title":"Synthetic Expression of Two Distinct Classes — synthetic_expression","text":"Synthetic Expression Two Distinct Classes","code":""},{"path":"https://kaiaragaki.github.io/reclanc/reference/synthetic_expression.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Synthetic Expression of Two Distinct Classes — synthetic_expression","text":"","code":"synthetic_expression"},{"path":[]},{"path":"https://kaiaragaki.github.io/reclanc/reference/synthetic_expression.html","id":"synthetic-expression","dir":"Reference","previous_headings":"","what":"synthetic_expression","title":"Synthetic Expression of Two Distinct Classes — synthetic_expression","text":"list containing two items: expression Normalized log expression 12 samples across 100 genes classes factor vector classes 12 samples","code":""}]