Skip to content

Commit

Permalink
Merge pull request #14 from infinity-a11y/version_1.3.1
Browse files Browse the repository at this point in the history
Version 1.4.0 Pull Request
  • Loading branch information
infinity-a11y authored Jun 21, 2024
2 parents 31cc854 + 40dafd0 commit ffcb4e8
Show file tree
Hide file tree
Showing 22 changed files with 1,362 additions and 1,078 deletions.
1,555 changes: 948 additions & 607 deletions App.R

Large diffs are not rendered by default.

44 changes: 23 additions & 21 deletions PhyloTrace.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ channels:
- defaults
dependencies:
- r-base=4.3.2
- r-remotes=2.5.0
- kma=1.4.14
- parallel=20240522
- pblat=2.5.1
- r-bh=1.81.0-1
- r-biocmanager=1.30.22
Expand Down Expand Up @@ -33,11 +35,12 @@ dependencies:
- r-bit64=4.0.5
- r-blob=1.2.4
- r-broom=1.0.5
- r-bslib=0.5.1
- r-cachem=1.0.8
- r-bslib=0.7.0
- r-bsicons=0.1.1
- r-cachem=1.1.0
- r-callr=3.7.3
- r-cellranger=1.1.0
- r-cli=3.6.1
- r-cli=3.6.2
- r-clipr=0.8.0
- r-coda=0.19_4
- r-colorspace=2.1_0
Expand All @@ -49,19 +52,19 @@ dependencies:
- r-dashboardthemes=1.1.6
- r-data.table=1.14.8
- r-dbplyr=2.3.4
- r-digest=0.6.33
- r-digest=0.6.35
- r-downloader=0.4
- r-dplyr=1.1.4
- r-dtplyr=1.3.1
- r-ellipsis=0.3.2
- r-evaluate=0.22
- r-evaluate=0.24
- r-fansi=1.0.5
- r-farver=2.1.1
- r-fastmap=1.1.1
- r-fastmap=1.2.0
- r-fastmatch=1.1_4
- r-fontawesome=0.5.2
- r-forcats=1.0.0
- r-fs=1.6.3
- r-fs=1.6.4
- r-gargle=1.5.2
- r-generics=0.1.3
- r-ggfun=0.1.3
Expand All @@ -70,30 +73,30 @@ dependencies:
- r-ggplot2=3.4.4
- r-ggplotify=0.1.2
- r-ggrepel=0.9.4
- r-glue=1.6.2
- r-glue=1.7.0
- r-googledrive=2.1.1
- r-googlesheets4=1.1.1
- r-gridGraphics=0.5_1
- r-gtable=0.3.4
- r-haven=2.5.3
- r-highr=0.10
- r-highr=0.11
- r-hms=1.1.3
- r-htmltools=0.5.6.1
- r-htmlwidgets=1.6.2
- r-htmltools=0.5.8.1
- r-htmlwidgets=1.6.4
- r-httpuv=1.6.11
- r-httr=1.4.7
- r-ids=1.0.1
- r-igraph=1.5.1
- r-isoband=0.2.7
- r-jquerylib=0.1.4
- r-jsonlite=1.8.7
- r-jsonlite=1.8.8
- r-kableExtra=1.3.4
- r-knitr=1.44
- r-knitr=1.47
- r-labeling=0.4.3
- r-later=1.3.1
- r-lattice=0.22_5
- r-lazyeval=0.2.2
- r-lifecycle=1.0.3
- r-lifecycle=1.0.4
- r-lubridate=1.9.3
- r-magrittr=2.0.3
- r-memoise=2.0.1
Expand Down Expand Up @@ -126,11 +129,11 @@ dependencies:
- r-rematch2=2.1.2
- r-reprex=2.0.2
- r-rhandsontable=0.3.8
- r-rlang=1.1.1
- r-rmarkdown=2.25
- r-rlang=1.1.4
- r-rmarkdown=2.27
- r-rstudioapi=0.15.0
- r-rvest=1.0.3
- r-sass=0.4.7
- r-sass=0.4.9
- r-scales=1.2.1
- r-selectr=0.4_2
- r-shiny=1.7.5
Expand All @@ -153,20 +156,19 @@ dependencies:
- r-tidytree=0.4.5
- r-tidyverse=2.0.0
- r-timechange=0.2.0
- r-tinytex=0.48
- r-tinytex=0.51
- r-tzdb=0.4.0
- r-utf8=1.2.3
- r-uuid=1.1_1
- r-vctrs=0.6.4
- r-viridis=0.6.5
- r-viridisLite=0.4.2
- r-visNetwork=2.1.2
- r-vroom=1.6.4
- r-webshot=0.5.5
- r-withr=2.5.1
- r-xfun=0.40
- r-xfun=0.45
- r-xml2=1.3.5
- r-xtable=1.8_4
- r-yaml=2.3.7
- r-yaml=2.3.8
- r-yulab.utils=0.1.0
- r-zoo=1.8_12
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,11 @@ bash install_phylotrace.sh
### 2.4 Uninstall
To uninstall PhyloTrace from your system, remove the application directory and run the following command to remove the
desktop launcher:
desktop launcher and the PhyloTrace conda environment:
```bash
rm $HOME/.local/share/applications/PhyloTrace.desktop
rm $HOME/.local/share/icons/hicolor/scalable/apps/PhyloTrace.png
conda remove -n PhyloTrace --all -y
```

### 2.5 Troubleshooting
Expand Down
64 changes: 15 additions & 49 deletions execute/automatic_typing.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
meta_info <- readRDS("meta_info.rds")
db_path <- readRDS("multi_typing_df.rds")[, "db_path"]
assembly_folder <- dir(paste0(getwd(), "/selected_genomes"), full.names = TRUE)
assembly <- assembly_folder[grep(tail(dir(paste0(getwd(), "/kma_multi/results")), n = 1), assembly_folder)]
assembly <- assembly_folder[which(commandArgs(trailingOnly = TRUE)[1] == basename(assembly_folder))]
results_folder <- dir(paste0(meta_info$db_directory, "/execute/blat_multi/results"), full.names = TRUE)

source("variant_validation.R")

Expand All @@ -20,6 +21,11 @@ column_classes <- function(df) {
})
}

# Function to log messages to the file
log_message <- function(log_file, message) {
cat(format(Sys.time(), "%Y-%m-%d %H:%M:%S"), "-", message, "\n", file = log_file, append = TRUE)
}

# Define start and stop codons
start_codons <- c("ATG", "GTG", "TTG")
stop_codons <- c("TAA", "TAG", "TGA")
Expand All @@ -31,7 +37,7 @@ allele_folder <- list.files(paste0(db_path, "/", gsub(" ", "_", meta_info$cgmlst
template <- readLines(assembly)

# List all .psl result files from alignment with BLAT
psl_files <- list.files(tail(dir(paste0(meta_info$db_directory, "/execute/kma_multi/results"), full.names = TRUE), n = 1), pattern = "\\.psl$", full.names = TRUE)
psl_files <- list.files(results_folder[which(sub("\\.(fasta|fna|fa)$", "", basename(assembly)) == basename(results_folder))], pattern = "\\.psl$", full.names = TRUE)

# Initialize an empty vector to store the results
allele_vector <- integer(length(psl_files))
Expand Down Expand Up @@ -299,54 +305,14 @@ if(sum(unname(base::sapply(psl_files, file.size)) <= 427) / length(psl_files) <=
# Save new Entry in Typing Database
saveRDS(Database, paste0(db_path, "/", gsub(" ", "_", meta_info$cgmlst_typing), "/Typing.rds"))

multi_user_fb <- paste0(
"#!/bin/bash\n",
'log_file=', shQuote(paste0(getwd(), "/execute/script_log.txt")), '\n',
'# Function to log messages to the file', '\n',
'log_message() {', '\n',
' echo "$(date +"%Y-%m-%d %H:%M:%S") - $1" >> "$log_file"', '\n',
'}', '\n',
'log_message "Successful typing of "', shQuote(sub("\\.(fasta|fna|fa)$", "", basename(assembly)))
)

# Specify the path to save the script
multi_user_fb_path <- paste0(getwd(), "/execute/multi_user_fb.sh")

# Write the script to a file
cat(multi_user_fb, file = multi_user_fb_path)

# Make the script executable
system(paste("chmod +x", multi_user_fb_path))

# Execute the script
system(paste(multi_user_fb_path), wait = FALSE)
# Logging successes
log_message(log_file = paste0(getwd(), "/execute/script_log.txt"),
message = paste0("Successful typing of ", sub("\\.(fasta|fna|fa)$", "", basename(assembly))))

} else {

failures <- sum(unname(base::sapply(psl_files, file.size)) <= 427) / length(psl_files) * 100

multi_user_fb <- paste0(
"#!/bin/bash\n",
'log_file=', shQuote(paste0(getwd(), "/execute/script_log.txt")), '\n',
'# Function to log messages to the file', '\n',
'log_message() {', '\n',
' echo "$(date +"%Y-%m-%d %H:%M:%S") - $1" >> "$log_file"', '\n',
'}', '\n',
'log_message ', shQuote(paste0("Assembly typing failed for ", sub("\\.(fasta|fna|fa)$", "", basename(assembly)))), '\n',
shQuote(paste0(failures, "% of loci not typed."))
)

# Specify the path to save the script
multi_user_fb_path <- paste0(getwd(), "/execute/multi_user_fb.sh")

# Write the script to a file
cat(multi_user_fb, file = multi_user_fb_path)

# Make the script executable
system(paste("chmod +x", multi_user_fb_path))

# Execute the script
system(paste(multi_user_fb_path), wait = FALSE)
# Logging failures
log_message(log_file = paste0(getwd(), "/execute/script_log.txt"),
message = paste0("Assembly typing failed for ",
sub("\\.(fasta|fna|fa)$", "", basename(assembly))))
}


37 changes: 16 additions & 21 deletions execute/kma_multi.sh → execute/blat_multi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,17 @@ genome_folder=$(Rscript -e "cat(readRDS('multi_typing_df.rds')[,'genome_folder']
genome_names=$(Rscript -e "cat(readRDS('multi_typing_df.rds')[,'genome_names'])")
alleles=$(Rscript -e "cat(readRDS('multi_typing_df.rds')[,'alleles'])")

# Directory name
mkdir $base_path/execute/kma_multi
results="$base_path/execute/kma_multi/results"
# Remove the existing multi directory
if [ -d "$base_path/execute/blat_multi" ]; then
rm -r "$base_path/execute/blat_multi"
fi
mkdir "$base_path/execute/blat_multi"

# Remove the existing directory (if it exists)
# Remove the existing results directory
results="$base_path/execute/blat_multi/results"
if [ -d "$results" ]; then
rm -r "$results"
fi

# Create a new directory
mkdir "$results"

selected_genomes="$base_path/execute/selected_genomes"
Expand Down Expand Up @@ -54,9 +55,9 @@ for file in "${file_names[@]}"; do
done

#INDEXING GENOME AS DATABASE
kma_database="$base_path/execute/kma_multi/$scheme"
blat_database="$base_path/execute/blat_multi/$scheme"

#RUNNING KMA Loop
#RUNNING blat Loop
genome_filename_noext=""

#Indexing Loop
Expand All @@ -69,21 +70,15 @@ for genome in "$selected_genomes"/*; do
genome_filename=$(basename "$genome")
genome_filename_noext="${genome_filename%.*}"
log_message "Processing $genome_filename"
kma index -i "$genome" -o "$kma_database"
fi
mkdir "$results/$genome_filename_noext"

#Running Loop
for query_file in "$alleles"/*.{fasta,fa,fna}; do
if [ -f "$query_file" ]; then
query_filename=$(basename "$query_file")
query_filename_noext="${query_filename%.*}"
output_file="$results/$genome_filename_noext/$query_filename_noext"
#kma -i "$query_file" -o "$output_file" -t_db "$kma_database" -nc -status
pblat $genome "$query_file" "$output_file.psl"
fi
done

result_folder="$results/$genome_filename_noext"

# Run parallelized BLAT
find "$alleles" -type f \( -name "*.fasta" -o -name "*.fa" -o -name "*.fna" \) | parallel pblat $genome {} "$result_folder/{/.}.psl"

log_message "Attaching $genome_filename"
Rscript "$base_path/execute/automatic_typing.R"
Rscript "$base_path/execute/automatic_typing.R" "$genome_filename"
done
log_message "Multi Typing finalized."
50 changes: 50 additions & 0 deletions execute/blat_run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash

cd execute
source ~/miniconda3/etc/profile.d/conda.sh
conda activate PhyloTrace
unset R_HOME

# Set base path
base_path=$(Rscript -e "cat(readRDS('single_typing_df.rds')[,'wd'])")

# reset progress
echo 0 > "$base_path/execute/progress.txt"

# Get variables
scheme=$(Rscript -e "cat(readRDS('single_typing_df.rds')[,'scheme'])")
alleles=$(Rscript -e "cat(readRDS('single_typing_df.rds')[,'alleles'])")

# Remove the existing directory (if it exists)
if [ -d "$base_path/execute/blat_single" ]; then
rm -r "$base_path/execute/blat_single"
fi

mkdir "$base_path/execute/blat_single"

# Directory name
results="$base_path/execute/blat_single/results"

# Remove the existing directory (if it exists)
if [ -d "$results" ]; then
rm -r "$results"
fi

# Create a new directory
mkdir "$results"

# Check assembly file and save in the execute folder
Rscript "$base_path/execute/check_duplicate.R"
wait
genome="$base_path/execute/blat_single/assembly.fasta"

# Run parallelized BLAT
parallel --citation
find "$alleles" -type f \( -name "*.fasta" -o -name "*.fa" -o -name "*.fna" \) | parallel pblat $genome {} "$results/{/.}.psl"

# Start appending results
echo 888888 >> "$base_path/execute/progress.txt"
Rscript "$base_path/execute/single_typing.R"

# Single typing finalized
echo 999999 >> "$base_path/execute/progress.txt"
8 changes: 3 additions & 5 deletions execute/check_duplicate.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
library(stringr)

typing_meta <- readRDS(paste0(getwd(), "/single_typing_df.rds"))

assembly <- typing_meta$genome

lines <- readLines(assembly)

names <- str_extract(lines[seq(1, length(lines), by = 3)], "^[^\\s]+")
names <- stringr::str_extract(lines[seq(1, length(lines), by = 3)], "^[^\\s]+")

# Test if there are duplicates
if(length(names) != length(unique(names))){
Expand All @@ -23,7 +21,7 @@ if(length(names) != length(unique(names))){
}

# save the new assembly to working directory
writeLines(lines, paste0(getwd(), "/kma_single/assembly.fasta"))
writeLines(lines, paste0(getwd(), "/blat_single/assembly.fasta"))
} else {
writeLines(lines, paste0(getwd(), "/kma_single/assembly.fasta"))
writeLines(lines, paste0(getwd(), "/blat_single/assembly.fasta"))
}
4 changes: 1 addition & 3 deletions execute/check_duplicate_multi.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
library(stringr)

file_names <- list.files(paste0(getwd(), "/selected_genomes"), full.names = T)

# load selected assemblies
assemblies <- lapply(list.files(paste0(getwd(), "/selected_genomes"), full.names = T), readLines)

# loop through every assembly
for(i in 1:length(assemblies)){
names <- str_extract(assemblies[[i]][seq(1, length(assemblies[[i]]), by = 3)], "^[^\\s]+")
names <- stringr::str_extract(assemblies[[i]][seq(1, length(assemblies[[i]]), by = 3)], "^[^\\s]+")

# Test if there are duplicates
if(length(names) != length(unique(names))){
Expand Down
9 changes: 0 additions & 9 deletions execute/delete_typing.sh

This file was deleted.

Loading

0 comments on commit ffcb4e8

Please sign in to comment.