From 1471e0324c5ae85448f7e8637cbbb536bfd6ac2f Mon Sep 17 00:00:00 2001 From: wala-github <153521607+wala-github@users.noreply.github.com> Date: Mon, 6 May 2024 15:09:24 +0200 Subject: [PATCH 1/5] Add files via upload --- .../scripts/AC2-Quality_Visualisaition.Rmd | 221 ++++++++++++++++++ 1 file changed, 221 insertions(+) create mode 100644 dynamic_report/workflow/scripts/AC2-Quality_Visualisaition.Rmd diff --git a/dynamic_report/workflow/scripts/AC2-Quality_Visualisaition.Rmd b/dynamic_report/workflow/scripts/AC2-Quality_Visualisaition.Rmd new file mode 100644 index 00000000..ff7f7e92 --- /dev/null +++ b/dynamic_report/workflow/scripts/AC2-Quality_Visualisaition.Rmd @@ -0,0 +1,221 @@ +--- +title: "Quality visualisation" +author: "Wanxin_Lai" +date: "2024-05-03" +output: html_document +--- + +```{r message=FALSE, warning=FALSE, include=FALSE} +library(tidyverse) +library(ggtree) +library(phytools) +library(scales) +library(ggnewscale) +library(ggh4x) +library(gridExtra) +library(ggtreeExtra) +library(DT) +library(pheatmap) + +``` + +```{r message = FALSE, warning=FALSE} +# Import read raw input +checkm2_raw <- read_tsv(glob_list$checkm2, col_names = T) %>% + mutate(mag_size=round(Genome_Size/1000000, 2)) + +# Import Taxa info +gtdbtk_df = read_tsv(glob_list$gtdbtk) %>% + select(sample = user_genome, classification) %>% + identity() + +#Import tree info +mashtree_file <- read.tree(glob_list$mashtree) + +``` + +```{r message=FALSE} +# GTDBTK taxa +SampleTax <- gtdbtk_df %>% + column_to_rownames("sample") %>% + separate(col = 1, into = c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species"), sep = ";") %>% + rownames_to_column("sample") %>% + mutate_at(.vars=vars(Species), + .funs = ~ + str_remove(., + pattern="NA")) %>% + mutate_at(.vars=vars(Species), + .funs = ~ + str_remove(., + pattern = ".*[[:space:]]")) %>% + mutate_at(.vars=vars(Species), + .funs = ~ + str_remove(., + pattern = "(?<=sp)\\d.*")) %>% + mutate_at(.vars = vars(Domain), + .funs = ~ + str_remove_all(., + pattern = ".*d__")) %>% + mutate_at(.vars = vars(Phylum), + .funs = ~ + str_remove_all(., + pattern = ".*p__")) %>% + mutate_at(.vars = vars(Phylum), + .funs = ~ + str_remove_all(., + pattern = "_..*")) %>% + mutate_at(.vars = vars(Class), + .funs = ~ + str_remove_all(., + pattern = ".*c__")) %>% + mutate_at(.vars = vars(Order), + .funs = ~ + str_remove_all(., + pattern = ".*o__")) %>% + mutate_at(.vars = vars(Family), + .funs = ~ + str_remove_all(., + pattern = ".*f__")) %>% + mutate_at(.vars = vars(Genus), + .funs = ~ + str_remove_all(., + pattern = ".*g__")) %>% + mutate_at(.vars = vars(Genus), + .funs = ~ + str_remove_all(., + pattern = "_..*")) %>% + mutate_at(.vars = vars(Species), + .funs = ~ + str_remove_all(., + pattern = ".*s__")) %>% + mutate(Family = if_else(Family == "NA", Order, Family)) %>% + mutate(Genus = if_else(Genus == "NA", Family, Genus)) + +#convert empty string to sp +SampleTax$Species <- replace(SampleTax$Species, SampleTax$Species == "", "sp") +``` + + +```{r message = FALSE} +# Assign colour to each phylum +phylum_colour <- data.frame(Phylum = sort(unique(SampleTax$Phylum)), color = hue_pal()(12)) + +mags_phylum_colour <- SampleTax %>% + left_join(phylum_colour, by = join_by(Phylum == Phylum)) + +# Generate phylum color heatmap +heatmap <- mags_phylum_colour %>% arrange(match(sample, mashtree_file$tip.label)) %>% + select(sample,Phylum) %>% + mutate(Phylum = factor(Phylum, levels = unique(Phylum))) %>% + column_to_rownames(var = "sample") +``` + +```{r, warning=FALSE} +# baseline tree +circular_tree <- force.ultrametric(mashtree_file, method="extend") %>% # extend to ultrametric for visualisation + ggtree(., layout="circular", size=0.1, open.angle = 45) + +# Add phylum ring +circular_tree <- gheatmap(circular_tree, heatmap, offset=0.60, width=0.1, colnames=FALSE) + + geom_tiplab2(size=0.8, hjust=-0.1) + + theme(legend.position = "right", legend.title = element_text("Phylum"), plot.margin = margin(0, 0, 0, 0), panel.margin = margin(0, 0, 0, 0)) + + +# flush old color scheme in the ring +circular_tree <- circular_tree + new_scale_fill() + +# Add completeness ring +circular_tree <- circular_tree + + new_scale_fill() + + scale_fill_gradient(low = "#d1f4ba", high = "#f4baba") + + geom_fruit( + data=checkm2_raw, + geom=geom_bar, + mapping = aes(x=Completeness, y=Name, fill=Contamination), + offset = 0.8, + orientation="y", + stat="identity") + +# Add genome-size ring +circular_tree <- circular_tree + + new_scale_fill() + + scale_fill_manual(values = "#cccccc") + + geom_fruit( + data=checkm2_raw, + geom=geom_bar, + mapping = aes(x=mag_size, y=Name), + offset = 0.05, + orientation="y", + stat="identity") + +# Add text +circle_tree <- circular_tree + + annotate('text', x=1.7, y=0.0, label=' Phylum', size=3) + + annotate('text', x=1.9, y=0.0, label=' Genome quality', size=3) + + annotate('text', x=2.1, y=0.0, label=' Genome size', size=3) + +circle_tree %>% + open_tree(20) %>% rotate_tree(90) +``` + + +```{r warning=FALSE, message=FALSE} + +# Add text# First create a biplot chart + mags_details <- checkm2_raw %>% + left_join(SampleTax, by=join_by(Name == sample)) %>% + left_join(phylum_colour, by = join_by(Phylum == Phylum)) + +mag_stats_biplot <- mags_details %>% + ggplot(aes(x=Completeness,y=Contamination,size=Genome_Size,color=Phylum)) + + geom_point(alpha=0.7) + + ylim(c(10,0)) + + labs(y= "Contamination", x = "Completeness") + + theme_classic() + + theme(legend.position = "bottom") + +# X and Y axis boxplots to complement the plot and MAG quality statistics. +mag_stats_cont <- mags_details %>% + ggplot(aes(y=Contamination)) + + ylim(c(10,0)) + + geom_boxplot(colour = "#999999", fill="#cccccc") + + theme_void() + + theme(legend.position = "none", + axis.title.x = element_blank(), + axis.title.y = element_blank(), + axis.text.y=element_blank(), + axis.ticks.y=element_blank(), + axis.text.x=element_blank(), + axis.ticks.x=element_blank(), + plot.margin = unit(c(0, 0, 0.40, 0),"inches")) #add bottom-margin (top, right, bottom, left) + +mag_stats_comp <-mags_details %>% + ggplot(aes(x=Completeness)) + + xlim(c(50,100)) + + geom_boxplot(colour = "#999999", fill="#cccccc") + + theme_void() + + theme(legend.position = "none", + axis.title.x = element_blank(), + axis.title.y = element_blank(), + axis.text.y=element_blank(), + axis.ticks.y=element_blank(), + axis.text.x=element_blank(), + axis.ticks.x=element_blank(), + plot.margin = unit(c(0, 0, 0, 0.50),"inches")) #add left-margin (top, right, bottom, left) + + +grid.arrange(grobs = list(mag_stats_comp,mag_stats_biplot,mag_stats_cont), + layout_matrix = rbind(c(1,1,1,1,1,1,1,1,1,1,1,4), + c(2,2,2,2,2,2,2,2,2,2,2,3), + c(2,2,2,2,2,2,2,2,2,2,2,3), + c(2,2,2,2,2,2,2,2,2,2,2,3), + c(2,2,2,2,2,2,2,2,2,2,2,3), + c(2,2,2,2,2,2,2,2,2,2,2,3), + c(2,2,2,2,2,2,2,2,2,2,2,3), + c(2,2,2,2,2,2,2,2,2,2,2,3), + c(2,2,2,2,2,2,2,2,2,2,2,3), + c(2,2,2,2,2,2,2,2,2,2,2,3), + c(2,2,2,2,2,2,2,2,2,2,2,3), + c(2,2,2,2,2,2,2,2,2,2,2,3))) +``` + From f8164c3ddf50cc9bb9a2a2213c689bad228ecdf7 Mon Sep 17 00:00:00 2001 From: wala-github <153521607+wala-github@users.noreply.github.com> Date: Mon, 6 May 2024 15:11:53 +0200 Subject: [PATCH 2/5] Update report_template.rmd --- dynamic_report/workflow/scripts/report_template.rmd | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dynamic_report/workflow/scripts/report_template.rmd b/dynamic_report/workflow/scripts/report_template.rmd index 5749d527..73f3f265 100644 --- a/dynamic_report/workflow/scripts/report_template.rmd +++ b/dynamic_report/workflow/scripts/report_template.rmd @@ -373,6 +373,9 @@ section_definition_table %>% ```{r section_gtdbtk, child = paste0(report_scripts, "section_gtdbtk.rmd"), eval = render$gtdbtk} ``` +```{r AC2-Quality_Visualisaition, child = paste0(report_scripts, "AC2-Quality_Visualisaition.rmd"), eval = render$qualityvisualisation} +``` + ```{r section_mlst, child = paste0(report_scripts, "section_mlst.rmd"), eval = render$mlst} ``` From b32832c27ebdc5c366b4a6843c5c4e14463b3f4b Mon Sep 17 00:00:00 2001 From: wala-github <153521607+wala-github@users.noreply.github.com> Date: Thu, 9 May 2024 13:51:47 +0200 Subject: [PATCH 3/5] Update AC2-Quality_Visualisaition.Rmd Set the number of taxa accordingly --- dynamic_report/workflow/scripts/AC2-Quality_Visualisaition.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dynamic_report/workflow/scripts/AC2-Quality_Visualisaition.Rmd b/dynamic_report/workflow/scripts/AC2-Quality_Visualisaition.Rmd index ff7f7e92..9295b88c 100644 --- a/dynamic_report/workflow/scripts/AC2-Quality_Visualisaition.Rmd +++ b/dynamic_report/workflow/scripts/AC2-Quality_Visualisaition.Rmd @@ -98,7 +98,7 @@ SampleTax$Species <- replace(SampleTax$Species, SampleTax$Species == "", "sp") ```{r message = FALSE} # Assign colour to each phylum -phylum_colour <- data.frame(Phylum = sort(unique(SampleTax$Phylum)), color = hue_pal()(12)) +phylum_colour <- data.frame(Phylum = sort(unique(SampleTax$Phylum)), color = hue_pal()(length(unique(SampleTax$Phylum)))) mags_phylum_colour <- SampleTax %>% left_join(phylum_colour, by = join_by(Phylum == Phylum)) From 4902b3f289706a069cffcaa12a2bf8588db7c0c1 Mon Sep 17 00:00:00 2001 From: wala-github <153521607+wala-github@users.noreply.github.com> Date: Thu, 6 Jun 2024 19:32:49 +0200 Subject: [PATCH 4/5] Update report_template.rmd --- .../workflow/scripts/report_template.rmd | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/dynamic_report/workflow/scripts/report_template.rmd b/dynamic_report/workflow/scripts/report_template.rmd index 73f3f265..7b07141f 100644 --- a/dynamic_report/workflow/scripts/report_template.rmd +++ b/dynamic_report/workflow/scripts/report_template.rmd @@ -291,13 +291,13 @@ section_definition_table = tribble( ~section, ~expectation, ~glob, - "assembly_stats", 1, Sys.glob(paste0(results_directory, "/assembly-stats/assembly-stats.tsv")), - "sequence_lengths", N, Sys.glob(paste0(results_directory, "/samples/*/sequence_lengths/*.tsv")), - "busco", N, Sys.glob(paste0(results_directory, "/samples/*/busco/short_summary_extract.tsv")), - "checkm2", 1, Sys.glob(paste0(results_directory, "/checkm2/quality_report.tsv")), - "gtdbtk", 1, Sys.glob(paste0(results_directory, "/gtdbtk/gtdbtk.summary.tsv")), - "mlst", 1, Sys.glob(paste0(results_directory, "/mlst/mlst.tsv")), - "abricate", 4, Sys.glob( + "assembly_stats", 1, Sys.glob(paste0(results_directory, "/assembly-stats/assembly-stats.tsv")), + "sequence_lengths", N, Sys.glob(paste0(results_directory, "/samples/*/sequence_lengths/*.tsv")), + "busco", N, Sys.glob(paste0(results_directory, "/samples/*/busco/short_summary_extract.tsv")), + "checkm2", 1, Sys.glob(paste0(results_directory, "/checkm2/quality_report.tsv")), + "gtdbtk", 1, Sys.glob(paste0(results_directory, "/gtdbtk/gtdbtk.summary.tsv")), + "mlst", 1, Sys.glob(paste0(results_directory, "/mlst/mlst.tsv")), + "abricate", 4, Sys.glob( c( paste0(results_directory, "/abricate/ncbi_detailed.tsv"), paste0(results_directory, "/abricate/card_detailed.tsv"), @@ -305,13 +305,18 @@ section_definition_table = tribble( paste0(results_directory, "/abricate/vfdb_detailed.tsv") ) ), - "prokka", N, Sys.glob(paste0(results_directory, "/samples/*/prokka/*.txt")), - "kegg_pathway", 1, Sys.glob(paste0(results_directory, "/kegg_pathway/kegg_pathway_enrichment_analysis.tsv")), - "dbcan", N, Sys.glob(paste0(results_directory, "/samples/*/dbcan/dbcan-sub.hmm.out")), - "panaroo", 1, Sys.glob(paste0(results_directory, "/panaroo/gene_presence_absence.Rtab")), # This also counts for the panaroo summary file. - "snp_dists", 1, Sys.glob(paste0(results_directory, "/snp-dists/snp-dists.tsv")), - "mashtree", 1, Sys.glob(mashtree_voluntary_dependency) # Reuse from earlier. - + "prokka", N, Sys.glob(paste0(results_directory, "/samples/*/prokka/*.txt")), + "kegg_pathway", 1, Sys.glob(paste0(results_directory, "/kegg_pathway/kegg_pathway_enrichment_analysis.tsv")), + "dbcan", N, Sys.glob(paste0(results_directory, "/samples/*/dbcan/dbcan-sub.hmm.out")), + "panaroo", 1, Sys.glob(paste0(results_directory, "/panaroo/gene_presence_absence.Rtab")), # This also counts for the panaroo summary file. + "snp_dists", 1, Sys.glob(paste0(results_directory, "/snp-dists/snp-dists.tsv")), + "mashtree", 1, Sys.glob(mashtree_voluntary_dependency), # Reuse from earlier. + "qualityvisualisation", 3, Sys.glob( + c( + paste0(results_directory, "/checkm2/quality_report.tsv"), + paste0(results_directory, "/gtdbtk/gtdbtk.summary.tsv"), + mashtree_voluntary_dependency + ) ) %>% mutate(n_files = lengths(glob)) From 171b9afe101becbffbaf7f94941cc2c2bce693e6 Mon Sep 17 00:00:00 2001 From: wala-github <153521607+wala-github@users.noreply.github.com> Date: Thu, 6 Jun 2024 19:38:34 +0200 Subject: [PATCH 5/5] Update and rename AC2-Quality_Visualisaition.Rmd to AC2-Quality_Visualisation.Rmd --- ...2-Quality_Visualisaition.Rmd => AC2-Quality_Visualisation.Rmd} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename dynamic_report/workflow/scripts/{AC2-Quality_Visualisaition.Rmd => AC2-Quality_Visualisation.Rmd} (100%) diff --git a/dynamic_report/workflow/scripts/AC2-Quality_Visualisaition.Rmd b/dynamic_report/workflow/scripts/AC2-Quality_Visualisation.Rmd similarity index 100% rename from dynamic_report/workflow/scripts/AC2-Quality_Visualisaition.Rmd rename to dynamic_report/workflow/scripts/AC2-Quality_Visualisation.Rmd