diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8b7e3f90..b76da9b9 100755
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,15 +10,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
Code contributions to the new version:
-- [Daniel Valle](https://github.com/Daniel-VM)
-- [Sarai Varona](https://github.com/svarona)
-
### Template fixes and updates
-- Fixed path to blast database and update Emmtyper params [#339](https://github.com/BU-ISCIII/buisciii-tools/pull/339)
-- Updated sarek version (v3.4.4) in ExomeEB-ExomeTrio-WGSTrio templates [#341] (https://github.com/BU-ISCIII/buisciii-tools/pull/341)
-- Fixed IRMAs config for amended consensus [#325](https://github.com/BU-ISCIII/buisciii-tools/pull/325).
-
### Modules
#### Added enhancements
@@ -51,6 +44,36 @@ Code contributions to the hotfix:
### Requirements
+## [2.2.1] - 2024-10-01 : https://github.com/BU-ISCIII/buisciii-tools/releases/tag/2.2.1
+
+### Credits
+
+Code contributions to the new version:
+
+- [Daniel Valle](https://github.com/Daniel-VM)
+- [Sarai Varona](https://github.com/svarona)
+- [Victor Lopez](https://github.com/victor5lm)
+- [Sergio Olmos](https://github.com/OPSergio)
+
+### Template fixes and updates
+
+- Fixed path to blast database and update Emmtyper params [#339](https://github.com/BU-ISCIII/buisciii-tools/pull/339)
+- Updated sarek version (v3.4.4) in ExomeEB-ExomeTrio-WGSTrio templates [#341] (https://github.com/BU-ISCIII/buisciii-tools/pull/341)
+- Fixed IRMAs config for amended consensus [#325](https://github.com/BU-ISCIII/buisciii-tools/pull/325).
+- Improved excel_generator.py and bioinfo_doc.py email creation function, and updated sftp_user.json, setup.py, main.py and some lablogs [#344](https://github.com/BU-ISCIII/buisciii-tools/pull/344).
+
+### Modules
+
+#### Added enhancements
+
+#### Fixes
+
+#### Changed
+
+#### Removed
+
+### Requirements
+
## [2.2.0] - 2024-09-12 : https://github.com/BU-ISCIII/buisciii-tools/releases/tag/2.2.0
### Credits
diff --git a/bu_isciii/__main__.py b/bu_isciii/__main__.py
index 3933a027..1aebd780 100755
--- a/bu_isciii/__main__.py
+++ b/bu_isciii/__main__.py
@@ -57,7 +57,7 @@ def run_bu_isciii():
)
# stderr.print("[green] `._,._,'\n", highlight=False)
- __version__ = "2.2.0"
+ __version__ = "2.2.1"
stderr.print(
"[grey39] BU-ISCIII-tools version {}".format(__version__), highlight=False
)
diff --git a/bu_isciii/bioinfo_doc.py b/bu_isciii/bioinfo_doc.py
old mode 100755
new mode 100644
index 4da84672..519cb382
--- a/bu_isciii/bioinfo_doc.py
+++ b/bu_isciii/bioinfo_doc.py
@@ -646,10 +646,43 @@ def email_creation(self):
email_data["email_notes"] = self.delivery_notes.replace(
"\n", "
"
)
+ else:
+ email_data["email_notes"] = bu_isciii.utils.ask_for_some_text(
+ msg="Write email notes"
+ ).replace("\n", "
")
else:
- email_data["email_notes"] = bu_isciii.utils.ask_for_some_text(
- msg="Write email notes"
- ).replace("\n", "
")
+ if bu_isciii.utils.prompt_yn_question(
+ msg="Do you wish to provide a text file for email notes?",
+ dflt=False,
+ ):
+ for i in range(3, -1, -1):
+ email_data["email_notes"] = bu_isciii.utils.prompt_path(
+ msg="Write the path to the file with RAW text as email notes"
+ )
+ if not os.path.isfile(
+ os.path.expanduser(email_data["email_notes"])
+ ):
+ stderr.print(
+ f"Provided file doesn't exist. Attempts left: {i}"
+ )
+ else:
+ stderr.print(f"File selected: {email_data['email_notes']}")
+ break
+ else:
+ stderr.print(
+ "No more attempts. Email notes will be given by prompt"
+ )
+ email_data["email_notes"] = None
+ else:
+ email_data["email_notes"] = None
+
+ if email_data["email_notes"]:
+ with open(os.path.expanduser(email_data["email_notes"])) as f:
+ email_data["email_notes"] = f.read().replace("\n", "
")
+ else:
+ email_data["email_notes"] = bu_isciii.utils.ask_for_some_text(
+ msg="Write email notes"
+ ).replace("\n", "
")
email_data["user_data"] = self.resolution_info["service_user_id"]
email_data["service_id"] = self.service_name.split("_", 5)[0]
diff --git a/bu_isciii/templates/exomeeb/ANALYSIS/ANALYSIS01_EXOME/03-annotation/lablog b/bu_isciii/templates/exomeeb/ANALYSIS/ANALYSIS01_EXOME/03-annotation/lablog
index 61892d9b..ef8d93ad 100644
--- a/bu_isciii/templates/exomeeb/ANALYSIS/ANALYSIS01_EXOME/03-annotation/lablog
+++ b/bu_isciii/templates/exomeeb/ANALYSIS/ANALYSIS01_EXOME/03-annotation/lablog
@@ -22,7 +22,7 @@ echo "srun --partition short_idx --time 2:00:00 --chdir ${scratch_dir} --output
## 4-5. Lablog for annotating whole genome samples using Variant Effect Predictor (VEP).
-echo "srun --partition short_idx --mem 100G --time 4:00:00 --chdir ${scratch_dir} --output logs/VEP.log --job-name VEP singularity exec -B ${scratch_dir}/../../../ /data/bi/pipelines/singularity-images/ensembl-vep:103.1--pl5262h4a94de4_2 vep --fasta /data/bi/references/eukaria/homo_sapiens/hg19/1000genomes_b37/genome/human_g1k_v37.fasta -i ${scratch_dir}/vep/variants_fil_mod.vcf -o ${scratch_dir}/vep/vep_annot.vcf --cache --offline --dir_cache /data/bi/references/eukaria/homo_sapiens/cache_vep/ --everything --dir_plugins /data/bi/references/eukaria/homo_sapiens/cache_vep/Plugins/ --assembly GRCh37 --tab --plugin dbNSFP,/data/bi/references/eukaria/homo_sapiens/cache_vep/custom_databases/dbNSFP/GRCh37/dbNSFP4.1a_grch37.gz,clinvar_id,clinvar_trait,clinvar_OMIM_id,clinvar_Orphanet_id,HGVSc_snpEff,HGVSp_snpEff,SIFT_score,SIFT_pred,Polyphen2_HDIV_score,Polyphen2_HDIV_pred,Polyphen2_HVAR_score,Polyphen2_HVAR_pred,MutationTaster_score,MutationTaster_pred,MutationAssessor_score,MutationAssessor_pred,FATHMM_score,FATHMM_pred,PROVEAN_score,PROVEAN_pred,VEST4_score,MetaSVM_score,MetaSVM_pred,MetaLR_score,MetaLR_pred,CADD_raw,CADD_phred,CADD_raw_hg19,CADD_phred_hg19,GERP++_NR,GERP++_RS,phyloP100way_vertebrate,phastCons100way_vertebrate &" > _02_vep_annotation.sh
+echo "srun --partition short_idx --mem 100G --time 4:00:00 --chdir ${scratch_dir} --output logs/VEP.log --job-name VEP singularity exec -B /data/bi/references/eukaria/homo_sapiens/hg19/1000genomes_b37/genome/ -B /data/bi/references/eukaria/homo_sapiens/cache_vep/homo_sapiens -B /data/bi/references/eukaria/homo_sapiens/cache_vep/custom_databases/dbNSFP/GRCh37/ -B ${scratch_dir}/../../../ /data/bi/pipelines/singularity-images/ensembl-vep:103.1--pl5262h4a94de4_2 vep --fasta /data/bi/references/eukaria/homo_sapiens/hg19/1000genomes_b37/genome/human_g1k_v37.fasta -i ${scratch_dir}/vep/variants_fil_mod.vcf -o ${scratch_dir}/vep/vep_annot.vcf --cache --offline --dir_cache /data/bi/references/eukaria/homo_sapiens/cache_vep/ --everything --dir_plugins /data/bi/references/eukaria/homo_sapiens/cache_vep/Plugins/ --assembly GRCh37 --tab --plugin dbNSFP,/data/bi/references/eukaria/homo_sapiens/cache_vep/custom_databases/dbNSFP/GRCh37/dbNSFP4.1a_grch37.gz,clinvar_id,clinvar_trait,clinvar_OMIM_id,clinvar_Orphanet_id,HGVSc_snpEff,HGVSp_snpEff,SIFT_score,SIFT_pred,Polyphen2_HDIV_score,Polyphen2_HDIV_pred,Polyphen2_HVAR_score,Polyphen2_HVAR_pred,MutationTaster_score,MutationTaster_pred,MutationAssessor_score,MutationAssessor_pred,FATHMM_score,FATHMM_pred,PROVEAN_score,PROVEAN_pred,VEST4_score,MetaSVM_score,MetaSVM_pred,MetaLR_score,MetaLR_pred,CADD_raw,CADD_phred,CADD_raw_hg19,CADD_phred_hg19,GERP++_NR,GERP++_RS,phyloP100way_vertebrate,phastCons100way_vertebrate &" > _02_vep_annotation.sh
echo "grep -v '^##' ./vep/vep_annot.vcf > ./vep/vep_annot_head.txt" > _03_merge_data1.sh
echo "sed -i 's/#Uploaded_variation/ID/' ./vep/vep_annot_head.txt" >> _03_merge_data1.sh
diff --git a/bu_isciii/templates/exometrio/ANALYSIS/ANALYSIS01_EXOME/03-annotation/lablog b/bu_isciii/templates/exometrio/ANALYSIS/ANALYSIS01_EXOME/03-annotation/lablog
index 063393c2..e771e6dc 100644
--- a/bu_isciii/templates/exometrio/ANALYSIS/ANALYSIS01_EXOME/03-annotation/lablog
+++ b/bu_isciii/templates/exometrio/ANALYSIS/ANALYSIS01_EXOME/03-annotation/lablog
@@ -28,7 +28,7 @@ echo "srun --partition short_idx --time 2:00:00 --chdir ${scratch_dir} --output
# 3. Lablog for annotating whole genome samples using Variant Effect Predictor (VEP).
# Run Vep without the plugin columns
-echo "srun --partition short_idx --mem 100G --time 12:00:00 --chdir ${scratch_dir} --output logs/VEP.log --job-name VEP singularity exec -B ${scratch_dir}/../../../ /data/bi/pipelines/singularity-images/ensembl-vep:103.1--pl5262h4a94de4_2 vep --fasta /data/bi/references/eukaria/homo_sapiens/hg19/1000genomes_b37/genome/human_g1k_v37.fasta -i ${scratch_dir}/vep/variants_fil_mod.vcf -o ${scratch_dir}/vep/vep_annot.vcf --cache --offline --dir_cache /data/bi/references/eukaria/homo_sapiens/cache_vep/ --everything --assembly GRCh37 --tab &" > _02_vep_annotation.sh
+echo "srun --partition short_idx --mem 100G --time 12:00:00 --chdir ${scratch_dir} --output logs/VEP.log --job-name VEP singularity exec -B /data/bi/references/eukaria/homo_sapiens/hg19/1000genomes_b37/genome -B /data/bi/references/eukaria/homo_sapiens/cache_vep -B ${scratch_dir}/../../../ /data/bi/pipelines/singularity-images/ensembl-vep:103.1--pl5262h4a94de4_2 vep --fasta /data/bi/references/eukaria/homo_sapiens/hg19/1000genomes_b37/genome/human_g1k_v37.fasta -i ${scratch_dir}/vep/variants_fil_mod.vcf -o ${scratch_dir}/vep/vep_annot.vcf --cache --offline --dir_cache /data/bi/references/eukaria/homo_sapiens/cache_vep/ --everything --assembly GRCh37 --tab &" > _02_vep_annotation.sh
#--------------------------------------------------------------------------------------------------------------------
diff --git a/bu_isciii/templates/mtbseq/ANALYSIS/ANALYSIS01_MTBSEQ/03-MTBSeq/lablog b/bu_isciii/templates/mtbseq/ANALYSIS/ANALYSIS01_MTBSEQ/03-MTBSeq/lablog
index e5bdac16..0eff7900 100644
--- a/bu_isciii/templates/mtbseq/ANALYSIS/ANALYSIS01_MTBSEQ/03-MTBSeq/lablog
+++ b/bu_isciii/templates/mtbseq/ANALYSIS/ANALYSIS01_MTBSEQ/03-MTBSeq/lablog
@@ -13,9 +13,9 @@ cat ../samples_id.txt | xargs -I @@ echo -e "srun --job-name MTBSEQ.@@ --output
# classification
echo "mkdir classification_all" > _03_gather_results.sh
-echo "FIRST_SAMPLE=$(head -n1 ../samples_id.txt); head -n 1 ${FIRST_SAMPLE}/Classification/Strain_Classification.tab > classification_all/strain_classification_all.tab; grep \"^'$analysis_year\" */Classification/Strain_Classification.tab | cut -d \":\" -f 2 >> classification_all/strain_classification_all.tab" >> _03_gather_results.sh
+echo 'FIRST_SAMPLE=$(head -n1 samples_id.txt); head -n 1 ${FIRST_SAMPLE}/Classification/Strain_Classification.tab > classification_all/strain_classification_all.tab; grep "^'\'''"$analysis_year"'" */Classification/Strain_Classification.tab | cut -d ":" -f 2 >> classification_all/strain_classification_all.tab' >> _03_gather_results.sh
# resistances
echo "mkdir resistances_all" >> _03_gather_results.sh
cat ../samples_id.txt | xargs -I % echo "cp %/Amend/NONE_joint_cf4_cr4_fr75_ph4_samples1_amended.tab resistances_all/%_var_res.tab" >> _03_gather_results.sh
# stats
-echo "mkdir stats_all; FIRST_SAMPLE=$(head -n1 ../samples_id.txt); head -n 1 ${FIRST_SAMPLE}/Statistics/Mapping_and_Variant_Statistics.tab > stats_all/statistics_all.tab; grep \"^'$analysis_year\" */Statistics/Mapping_and_Variant_Statistics.tab | cut -d \":\" -f 2 >> stats_all/statistics_all.tab" >> _03_gather_results.sh
+echo 'mkdir stats_all; FIRST_SAMPLE=$(head -n1 ../samples_id.txt); head -n 1 ${FIRST_SAMPLE}/Statistics/Mapping_and_Variant_Statistics.tab > stats_all/statistics_all.tab; grep "^'\'''"$analysis_year"'" */Statistics/Mapping_and_Variant_Statistics.tab | cut -d ":" -f 2 >> stats_all/statistics_all.tab' >> _03_gather_results.sh
diff --git a/bu_isciii/templates/sftp_user.json b/bu_isciii/templates/sftp_user.json
index bc7f4528..344c8b49 100755
--- a/bu_isciii/templates/sftp_user.json
+++ b/bu_isciii/templates/sftp_user.json
@@ -44,7 +44,7 @@
"sara.perez": ["GeneticDiagnosis"],
"sbarturen": ["Labvirushep"],
"sergio.sanchez": ["LabFWDB_ssanchez"],
- "sherrera": ["LabFWBD", "LabFWBD_ext"],
+ "sherrera": ["LabFWBD", "LabFWBD_ext", "Labtuberculosis"],
"sresino": ["Labvirushep"],
"svaldezate": ["Labtaxonomia"],
"svazquez": ["Labvirusres"],
diff --git a/bu_isciii/templates/viralrecon/RESULTS/excel_generator.py b/bu_isciii/templates/viralrecon/RESULTS/excel_generator.py
index 8467c414..2cdb83b4 100755
--- a/bu_isciii/templates/viralrecon/RESULTS/excel_generator.py
+++ b/bu_isciii/templates/viralrecon/RESULTS/excel_generator.py
@@ -68,7 +68,9 @@ def merge_lineage_tables(
csvs_in_folder=csvs_in_folder, merged_csv_name=merged_csv_name
)
else:
- print(f"No pangolin folder could be found for {ref}, omitting")
+ print(
+ f"\033[93mNo pangolin folder could be found for {ref}, omitting\033[0m"
+ )
if os.path.isdir(os.path.abspath(folder + "/nextclade")):
nextcl_dir = os.path.join(folder, "nextclade")
@@ -82,17 +84,20 @@ def merge_lineage_tables(
csvs_in_folder=csvs_in_folder, merged_csv_name=merged_csv_name
)
else:
- print(f"No nextclade folder could be found for {ref}, omitting")
+ print(
+ f"\033[93mNo nextclade folder could be found for {ref}, omitting\033[0m\n"
+ )
return
def excel_generator(csv_files: List[str]):
+ # print("Proceeding")
for file in csv_files:
if not os.path.exists(file):
- print(f"File {file} does not exist, omitting...")
+ print(f"\033[91mFile {file} does not exist, omitting...\033[0m")
continue
- print(f"Generating excel file for {file}")
+ print(f"\033[92mGenerating excel file for {file}\033[0m")
output_name = os.path.splitext(os.path.basename(file))[0] + ".xlsx"
# workbook = openpyxl.Workbook(output_name)
if "nextclade" in str(file):
@@ -108,7 +113,7 @@ def excel_generator(csv_files: List[str]):
try:
table = pd.read_csv(file)
except pd.errors.EmptyDataError:
- print("Could not parse table from ", str(file))
+ print("\033[91mCould not parse table from ", str(file), "\033[0m")
continue
table = table.drop(["index"], axis=1, errors="ignore")
table.to_excel(output_name, index=False)
@@ -119,22 +124,26 @@ def single_csv_to_excel(csv_file: str):
try:
excel_generator([csv_file])
except FileNotFoundError as e:
- print(f"Could not find file {e}")
+ print(f"\033[91mCould not find file {e}\033[0m")
def main(args):
if args.single_csv:
# If single_csv is called, just convert target csv to excel and skip the rest
- print("Single file convertion selected. Skipping main process...")
+ print(
+ "\033[92mSingle file convertion selected. Skipping main process...\033[0m"
+ )
single_csv_to_excel(args.single_csv)
exit(0)
print(
- "Extracting references used for analysis and the samples associated with each reference\n"
+ "\033[92mExtracting references used for analysis and the samples associated with each reference\033[0m"
)
with open(args.reference_file, "r") as file:
references = [line.rstrip() for line in file]
- print(f"\nFound {len(references)} references: {str(references).strip('[]')}")
+ print(
+ f"\n\033[92mFound {len(references)} references: {str(references).strip('[]')}\033[0m"
+ )
reference_folders = {ref: str("excel_files_" + ref) for ref in references}
samples_ref_files = {
@@ -145,7 +154,7 @@ def main(args):
# Merge pangolin and nextclade csv files separatedly and create excel files for them
merge_lineage_tables(reference_folders, samples_ref_files)
for reference, folder in reference_folders.items():
- print(f"Creating excel files for reference {reference}")
+ print(f"\033[92mCreating excel files for reference {reference}\033[0m")
csv_files = [
file.path for file in os.scandir(folder) if file.path.endswith(".csv")
]
@@ -159,9 +168,14 @@ def main(args):
concat_tables_and_write(
csvs_in_folder=variants_tables, merged_csv_name="variants_long_table.csv"
)
- except FileNotFoundError as e:
- print(str(e))
- print("Merged variants_long_table.csv might be empty")
+ except FileNotFoundError:
+ print("\033[93mWARNING!\033[0m")
+ print(
+ "\033[93mAt least one variants_long_table.csv file could not be found. Therefore, merged variants_long_table.csv will be incomplete.\033[0m"
+ )
+ print(
+ "\033[93mPlease, check the following report in order to know which links are broken and, therefore, which tables could not be found:\033[0m\n"
+ )
# Create excel files for individual tables
valid_extensions = [".csv", ".tsv", ".tab"]
@@ -173,6 +187,16 @@ def main(args):
link_csvs = [file for file in rest_of_csvs if os.path.islink(file)]
broken_links = [file for file in link_csvs if not os.path.exists(os.readlink(file))]
valid_csvs = [file for file in rest_of_csvs if file not in broken_links]
+
+ if broken_links:
+ print(
+ f"\033[93mWARNING! {len(broken_links)} broken links found (for .csv, .tsv or .tab files). Please fix them.\033[0m"
+ )
+ for broken_link in broken_links:
+ print(
+ f"\033[93mBroken link: {broken_link} (target: {os.readlink(broken_link)})\033[0m"
+ )
+
excel_generator(valid_csvs)
diff --git a/bu_isciii/templates/wgstrio/ANALYSIS/ANALYSIS01_GENOME/03-annotation/lablog b/bu_isciii/templates/wgstrio/ANALYSIS/ANALYSIS01_GENOME/03-annotation/lablog
index 50d87f79..c6a964bd 100644
--- a/bu_isciii/templates/wgstrio/ANALYSIS/ANALYSIS01_GENOME/03-annotation/lablog
+++ b/bu_isciii/templates/wgstrio/ANALYSIS/ANALYSIS01_GENOME/03-annotation/lablog
@@ -31,7 +31,7 @@ echo "srun --partition short_idx --time 2:00:00 --chdir ${scratch_dir} --output
# Run Vep without the plugin columns
-echo "srun --partition short_idx --mem 100G --time 12:00:00 --chdir ${scratch_dir} --output logs/VEP.log --job-name VEP singularity exec -B ${scratch_dir}/../../../ /data/bi/pipelines/singularity-images/ensembl-vep:103.1--pl5262h4a94de4_2 vep --fasta /data/bi/references/eukaria/homo_sapiens/hg19/1000genomes_b37/genome/human_g1k_v37.fasta -i ${scratch_dir}/vep/variants_fil_mod.vcf -o ${scratch_dir}/vep/vep_annot.vcf --cache --offline --dir_cache /data/bi/references/eukaria/homo_sapiens/cache_vep/ --everything --assembly GRCh37 --tab &" > _02_vep_annotation.sh
+echo "srun --partition short_idx --mem 100G --time 12:00:00 --chdir ${scratch_dir} --output logs/VEP.log --job-name VEP singularity exec -B /data/bi/references/eukaria/homo_sapiens/hg19/1000genomes_b37/genome/ -B /data/bi/references/eukaria/homo_sapiens/cache_vep/ -B ${scratch_dir}/../../../ /data/bi/pipelines/singularity-images/ensembl-vep:103.1--pl5262h4a94de4_2 vep --fasta /data/bi/references/eukaria/homo_sapiens/hg19/1000genomes_b37/genome/human_g1k_v37.fasta -i ${scratch_dir}/vep/variants_fil_mod.vcf -o ${scratch_dir}/vep/vep_annot.vcf --cache --offline --dir_cache /data/bi/references/eukaria/homo_sapiens/cache_vep/ --everything --assembly GRCh37 --tab &" > _02_vep_annotation.sh
#--------------------------------------------------------------------------------------------------------------------
diff --git a/setup.py b/setup.py
index efa93a59..50fdff3d 100755
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
from setuptools import setup, find_packages
-version = "2.2.0"
+version = "2.2.1"
with open("README.md") as f:
readme = f.read()