Skip to content

Commit

Permalink
Merge pull request #17 from infinity-a11y/version_1.4.1
Browse files Browse the repository at this point in the history
Version 1.4.1
  • Loading branch information
infinity-a11y authored Jul 14, 2024
2 parents 1f8aafe + 2f0aede commit 78921d2
Show file tree
Hide file tree
Showing 23 changed files with 7,009 additions and 4,380 deletions.
8,654 changes: 5,202 additions & 3,452 deletions App.R

Large diffs are not rendered by default.

10 changes: 6 additions & 4 deletions PhyloTrace.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ dependencies:
- r-bh=1.81.0-1
- r-biocmanager=1.30.22
- bioconductor-biostrings=2.70.1
- bioconductor-ggtree=3.8.0
- bioconductor-ggtreeextra=1.10.0
- bioconductor-ggtree=3.10.0
- bioconductor-ggtreeextra=1.12.0
- bioconductor-biocversion=3.17.1
- bioconductor-treeio=1.24.1
- bioconductor-treeio=1.26.0
- r-dbi=1.1.3
- r-mass=7.3_60
- r-matrix=1.6_1.1
Expand Down Expand Up @@ -98,6 +98,7 @@ dependencies:
- r-lattice=0.22_5
- r-lazyeval=0.2.2
- r-lifecycle=1.0.4
- r-logr=1.3.8
- r-lubridate=1.9.3
- r-magrittr=2.0.3
- r-memoise=2.0.1
Expand Down Expand Up @@ -137,7 +138,8 @@ dependencies:
- r-sass=0.4.9
- r-scales=1.2.1
- r-selectr=0.4_2
- r-shiny=1.7.5
- r-shiny=1.8.1.1
- r-shinybs=0.61.1
- r-shinyFiles=0.9.3
- r-shinyWidgets=0.8.0
- r-shinydashboard=0.7.2
Expand Down
636 changes: 392 additions & 244 deletions README.md

Large diffs are not rendered by default.

13 changes: 8 additions & 5 deletions Report/Report.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,23 @@ knitr::include_graphics(paste0(getwd(),"/PhyloTrace_bw.png"))

\

```{r echo=FALSE, fig.align = "center"}
```{r echo=FALSE, fig.align = "center", out.width='100%'}
if(!my_loaded_variables[["plot"]] == "MST"){
if(my_loaded_variables[["report_df.Include"]][13]) {
knitr::include_graphics(paste0(my_loaded_variables[["plot"]], ".jpeg"))
}
} else {
if(my_loaded_variables[["report_df.Include"]][13]) {
knitr::include_graphics(paste0(my_loaded_variables[["plot"]], ".jpg"))
}
}
```

\

```{r echo=FALSE}
if(my_loaded_variables[["report_df.Include"]][1]) {
kable(select(my_loaded_variables[["entry_table"]], -2),
kable(select(my_loaded_variables[["entry_table"]], my_loaded_variables[["table_columns"]]),
row.names = FALSE,
linesep = "") %>%
kable_styling(bootstrap_options = "striped", full_width = TRUE, font_size = 10) %>%
Expand Down Expand Up @@ -90,7 +93,7 @@ if(!is.null(my_loaded_variables[["na_handling"]])){
if(my_loaded_variables[["report_df.Include"]][10]){"Distance algorithm"},
if(my_loaded_variables[["report_df.Include"]][11]){"Missing value handling"},
if(my_loaded_variables[["report_df.Include"]][12]){"PhyloTrace version"},
if(my_loaded_variables[["report_df.Include"]][12]){"KMA version"}),
if(my_loaded_variables[["report_df.Include"]][12]){"pblat version"}),
Content = c(if(my_loaded_variables[["report_df.Include"]][9]){my_loaded_variables[["tree"]]},
if(my_loaded_variables[["report_df.Include"]][10]){my_loaded_variables[["distance"]]},
if(my_loaded_variables[["report_df.Include"]][11]){my_loaded_variables[["na_handling"]]},
Expand All @@ -101,7 +104,7 @@ if(!is.null(my_loaded_variables[["na_handling"]])){
df <- data.frame(Item = c(if(my_loaded_variables[["report_df.Include"]][9]){"Tree construction"},
if(my_loaded_variables[["report_df.Include"]][10]){"Distance algorithm"},
if(my_loaded_variables[["report_df.Include"]][12]){"PhyloTrace version"},
if(my_loaded_variables[["report_df.Include"]][12]){"KMA version"}),
if(my_loaded_variables[["report_df.Include"]][12]){"pblat version"}),
Content = c(if(my_loaded_variables[["report_df.Include"]][9]){my_loaded_variables[["tree"]]},
if(my_loaded_variables[["report_df.Include"]][10]){my_loaded_variables[["distance"]]},
if(my_loaded_variables[["report_df.Include"]][12]){my_loaded_variables[["version"]][1]},
Expand Down
14 changes: 10 additions & 4 deletions execute/check_duplicate_multi.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
library(logr)
# Get the command line arguments
args <- commandArgs(trailingOnly = TRUE)

Expand All @@ -12,8 +13,11 @@ log.message <- function(log_file, message) {
cat(format(Sys.time(), "%Y-%m-%d %H:%M:%S"), "-", message, "\n", file = log_file, append = TRUE)
}

log.message(log_file = paste0(base_path, "/logs/output.log"),
message = "Initiated multi typing fasta name duplicates check")
logfile <- file.path(paste0(base_path, "/logs/check_duplicate_multi.log"))

log <- log_open(logfile, logdir = FALSE)

log_print("Initiated multi typing fasta name duplicates check")

# load selected assemblies
assemblies <- lapply(list.files(paste0(getwd(), "/selected_genomes"), full.names = T), readLines)
Expand All @@ -25,12 +29,12 @@ for(i in 1:length(assemblies)){
# Test if there are duplicates
if(length(names) != length(unique(names))){

log.message(log_file = paste0(base_path, "/logs/output.log"),
message = paste0("Duplicate(s) present in ", basename(file_names[i])))
log_print(paste0("Duplicate(s) present in ", basename(file_names[i])))

# add a number to the duplicates
for(j in 1:length(names)){
if(sum(names == names[j]) > 1){
indices <- which(names == names[j])
names[j] <- paste0(names[j], "_", which(names == names[j]))
}
}
Expand All @@ -44,3 +48,5 @@ for(i in 1:length(assemblies)){
writeLines(assemblies[[i]], file_names[i])
}
}

log_close()
24 changes: 15 additions & 9 deletions execute/check_duplicate.R → execute/check_duplicate_single.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
library(logr)
typing_meta <- readRDS(paste0(getwd(), "/single_typing_df.rds"))

# Function to log messages
log.message <- function(log_file, message) {
cat(format(Sys.time(), "%Y-%m-%d %H:%M:%S"), "-", message, "\n", file = log_file, append = TRUE)
}

log.message(log_file = paste0(typing_meta$wd, "/logs/output.log"),
message = "Initiated single typing fasta name duplicates check")
logfile <- file.path(paste0(typing_meta$wd, "/logs/check_duplicate_single.log"))

log <- log_open(logfile, logdir = FALSE)

log_print("Initiated single typing fasta name duplicates check")

assembly <- typing_meta$genome

Expand All @@ -15,20 +19,20 @@ lines <- readLines(assembly)
names <- stringr::str_extract(lines[seq(1, length(lines), by = 3)], "^[^\\s]+")

# Test if there are duplicates
if(length(names) != length(unique(names))){
if(length(names) != length(unique(names))) {

log.message(log_file = paste0(typing_meta$wd, "/logs/output.log"),
message = paste0("Duplicate(s) present in ", basename(assembly)))
log_print(paste0("Duplicate(s) present in ", basename(assembly)))

# add a number to the duplicates
for(i in 1:length(names)){
if(sum(names == names[i]) > 1){
names[i] <- paste0(names[i], "_", which(names == names[i]))
for(i in 1:length(names)) {
if(sum(names == names[i]) > 1) {
indices <- which(names == names[i])
names[i] <- paste0(names[i], "_", indices[which(indices == i)])
}
}

# substitute the respective lines in the file with the new names
for(i in 1:length(names)){
for(i in 1:length(names)) {
lines[3*i - 2] <- paste0(names[i])
}

Expand All @@ -37,3 +41,5 @@ if(length(names) != length(unique(names))){
} else {
writeLines(lines, paste0(getwd(), "/blat_single/assembly.fasta"))
}

log_close()
4 changes: 2 additions & 2 deletions execute/kill_multi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
unset R_HOME

log_file='execute/script_log.txt'
blat_multi='execute/blat_multi.sh'
automatic_typing='automatic_typing.R'
blat_multi='execute/multi_typing.sh'
automatic_typing='multi_eval.R'

# Function to log messages to the file
log_message() {
Expand Down
23 changes: 14 additions & 9 deletions execute/automatic_typing.R → execute/multi_eval.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
library(logr)

meta_info <- readRDS("meta_info.rds")
db_path <- readRDS("multi_typing_df.rds")[, "db_path"]
assembly_folder <- dir(paste0(getwd(), "/selected_genomes"), full.names = TRUE)
Expand Down Expand Up @@ -26,8 +28,11 @@ log.message <- function(log_file, message) {
cat(format(Sys.time(), "%Y-%m-%d %H:%M:%S"), "-", message, "\n", file = log_file, append = TRUE)
}

log.message(log_file = paste0(meta_info$db_directory, "/logs/output.log"),
message = "Attaching initiated (automatic_typing.R)")
logfile <- file.path(paste0(getwd(), "/logs/multi_eval.log"))

log <- log_open(logfile, logdir = FALSE)

log_print("Attaching initiated")

# Define start and stop codons
start_codons <- c("ATG", "GTG", "TTG")
Expand Down Expand Up @@ -309,18 +314,18 @@ if(sum(unname(base::sapply(psl_files, file.size)) <= 427) / length(psl_files) <=
saveRDS(Database, paste0(db_path, "/", gsub(" ", "_", meta_info$cgmlst_typing), "/Typing.rds"))

# Logging successes
log.message(log_file = paste0(getwd(), "/execute/script_log.txt"),
message = paste0("Successful typing of ", sub("\\.(fasta|fna|fa)$", "", basename(assembly))))
log.message(log_file = paste0(meta_info$db_directory, "/logs/output.log"),
log.message(log_file = paste0(getwd(), "/logs/script_log.txt"),
message = paste0("Successful typing of ", sub("\\.(fasta|fna|fa)$", "", basename(assembly))))
log_print(paste0("Successful typing of ", sub("\\.(fasta|fna|fa)$", "", basename(assembly))))

} else {

# Logging failures
log.message(log_file = paste0(getwd(), "/execute/script_log.txt"),
message = paste0("Assembly typing failed for ",
sub("\\.(fasta|fna|fa)$", "", basename(assembly))))
log.message(log_file = paste0(meta_info$db_directory, "/logs/output.log"),
log.message(log_file = paste0(getwd(), "/logs/script_log.txt"),
message = paste0("Assembly typing failed for ",
sub("\\.(fasta|fna|fa)$", "", basename(assembly))))
log_print(paste0("Assembly typing failed for ",
sub("\\.(fasta|fna|fa)$", "", basename(assembly))))
}

log_close()
14 changes: 3 additions & 11 deletions execute/blat_multi.sh → execute/multi_typing.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,7 @@ fi
mkdir "$results"

selected_genomes="$base_path/execute/selected_genomes"
log_file="$base_path/execute/script_log.txt"
output_file="$base_path/logs/output.log"

echo "$(date +"%Y-%m-%d %H:%M:%S") - Initiated blat_multi.sh" >> "$output_file"
log_file="$base_path/logs/script_log.txt"

# Create a log file or truncate if it exists
echo "Start Multi Typing with $scheme scheme." > "$log_file"
Expand All @@ -47,10 +44,8 @@ for file in "${file_names[@]}"; do
if [ -f "$genome_folder/$file" ]; then
cp "$genome_folder/$file" "$selected_genomes/"
echo "$(date +"%Y-%m-%d %H:%M:%S") - Initiated $file" >> "$log_file"
echo "$(date +"%Y-%m-%d %H:%M:%S") - Initiated $file" >> "$output_file"
else
echo "$(date +"%Y-%m-%d %H:%M:%S") - $file not found in $genome_folder" >> "$log_file"
echo "$(date +"%Y-%m-%d %H:%M:%S") - $file not found in $genome_folder" >> "$output_file"
fi
done

Expand All @@ -70,18 +65,15 @@ for genome in "$selected_genomes"/*; do
genome_filename=$(basename "$genome")
genome_filename_noext="${genome_filename%.*}"
echo "$(date +"%Y-%m-%d %H:%M:%S") - Processing $genome_filename" >> "$log_file"
echo "$(date +"%Y-%m-%d %H:%M:%S") - Processing $genome_filename" >> "$output_file"
fi
mkdir "$results/$genome_filename_noext"

result_folder="$results/$genome_filename_noext"

# Run parallelized BLAT
find "$alleles" -type f \( -name "*.fasta" -o -name "*.fa" -o -name "*.fna" \) | parallel pblat $genome {} "$result_folder/{/.}.psl"
find "$alleles" -type f \( -name "*.fasta" -o -name "*.fa" -o -name "*.fna" \) | parallel pblat $genome {} "$result_folder/{/.}.psl" > /dev/null 2>&1

echo "$(date +"%Y-%m-%d %H:%M:%S") - Attaching $genome_filename" >> "$log_file"
echo "$(date +"%Y-%m-%d %H:%M:%S") - Initiating addition of $genome_filename (attaching)" >> "$output_file"
Rscript "$base_path/execute/automatic_typing.R" "$genome_filename"
Rscript "$base_path/execute/multi_eval.R" "$genome_filename"
done
echo "$(date +"%Y-%m-%d %H:%M:%S") - Multi Typing finalized." >> "$log_file"
echo "$(date +"%Y-%m-%d %H:%M:%S") - Multi Typing finalized." >> "$output_file"
58 changes: 28 additions & 30 deletions execute/single_typing.R → execute/single_eval.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
library(logr)

# Hand over variables
meta_info <- readRDS("meta_info_single.rds")
db_path <- readRDS("single_typing_df.rds")[, "db_path"]
Expand Down Expand Up @@ -25,8 +27,11 @@ log.message <- function(log_file, message) {
cat(format(Sys.time(), "%Y-%m-%d %H:%M:%S"), "- ", message, "\n", file = log_file, append = TRUE)
}

log.message(log_file = paste0(meta_info$db_directory, "/logs/output.log"),
message = "Attaching initiated (single_typing.R)")
logfile <- file.path(paste0(getwd(), "/logs/single_eval.log"))

log <- log_open(logfile, logdir = FALSE)

log_print("Attaching initiated")

# Define start and stop codons
start_codons <- c("ATG", "GTG", "TTG")
Expand Down Expand Up @@ -142,34 +147,27 @@ if(sum(unname(base::sapply(psl_files, file.size)) <= 427) / length(psl_files) <=

Database <- list(Typing = data.frame())

Typing <-
data.frame(matrix(
NA,
nrow = 0,
ncol = 12 + length(psl_files)
))
Typing <- data.frame(matrix(NA, nrow = 0, ncol = 12 + length(psl_files)))

metadata <-
c(
1,
TRUE,
meta_info$assembly_id,
meta_info$assembly_name,
meta_info$cgmlst_typing,
as.character(meta_info$append_isodate),
meta_info$append_host,
meta_info$append_country,
meta_info$append_city,
as.character(meta_info$append_analysisdate),
length(allele_vector) - sum(sapply(allele_vector, is.na)),
sum(sapply(allele_vector, is.na))
)
metadata <- c(
1,
TRUE,
meta_info$assembly_id,
meta_info$assembly_name,
meta_info$cgmlst_typing,
as.character(meta_info$append_isodate),
meta_info$append_host,
meta_info$append_country,
meta_info$append_city,
as.character(meta_info$append_analysisdate),
length(allele_vector) - sum(sapply(allele_vector, is.na)),
sum(sapply(allele_vector, is.na))
)

new_row <- c(metadata, allele_vector)

Typing <- rbind(Typing, new_row)


colnames(Typing) <-
append(
c(
Expand Down Expand Up @@ -298,18 +296,18 @@ if(sum(unname(base::sapply(psl_files, file.size)) <= 427) / length(psl_files) <=
saveRDS(Database, paste0(db_path, "/", gsub(" ", "_", meta_info$cgmlst_typing), "/Typing.rds"))

# Logging successes
log.message(log_file = paste0(getwd(), "/execute/single_typing_log.txt"),
message = paste0("Successful typing of ", meta_info$assembly_name))
log.message(log_file = paste0(getwd(), "/logs/output.log"),
log.message(log_file = paste0(getwd(), "/logs/single_typing_log.txt"),
message = paste0("Successful typing of ", meta_info$assembly_name))
log_print(paste0("Successful typing of ", meta_info$assembly_name))

} else {

failures <- sum(unname(base::sapply(psl_files, file.size)) <= 100) / length(psl_files) * 100

# Logging failures
log.message(log_file = paste0(getwd(), "/execute/single_typing_log.txt"),
message = paste0("Assembly typing of ", meta_info$assembly_name, " failed. ", failures, "% of loci not typed."))
log.message(log_file = paste0(getwd(), "/logs/output.log"),
log.message(log_file = paste0(getwd(), "/logs/single_typing_log.txt"),
message = paste0("Assembly typing of ", meta_info$assembly_name, " failed. ", failures, "% of loci not typed."))
log_print(paste0("Assembly typing of ", meta_info$assembly_name, " failed. ", failures, "% of loci not typed."))
}

log_close()
9 changes: 3 additions & 6 deletions execute/blat_run.sh → execute/single_typing.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,17 @@ fi
mkdir "$results"

# Check assembly file and save in the execute folder
Rscript "$base_path/execute/check_duplicate.R"
Rscript "$base_path/execute/check_duplicate_single.R"
wait
genome="$base_path/execute/blat_single/assembly.fasta"

# Run parallelized BLAT
parallel --citation
find "$alleles" -type f \( -name "*.fasta" -o -name "*.fa" -o -name "*.fna" \) | parallel pblat $genome {} "$results/{/.}.psl"
find "$alleles" -type f \( -name "*.fasta" -o -name "*.fa" -o -name "*.fna" \) | parallel pblat $genome {} "$results/{/.}.psl" > /dev/null 2>&1

# Start appending results
echo 888888 >> "$base_path/logs/progress.txt"
echo "Initiating addition of $genome_name (attaching)" >> "$base_path/logs/output.log"
Rscript "$base_path/execute/single_typing.R"
Rscript "$base_path/execute/single_eval.R"
wait

# Single typing finalized
echo 999999 >> "$base_path/logs/progress.txt"
echo "Finished typing of $genome_name" >> "$base_path/logs/output.log"
Loading

0 comments on commit 78921d2

Please sign in to comment.