-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #203 from rhysnewell/dev
merging v0.9.1
- Loading branch information
Showing
13 changed files
with
247 additions
and
104 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,25 @@ | ||
__version__ = "0.9.0" | ||
__version__ = "0.9.1" | ||
|
||
|
||
# CONSTANTS | ||
LONG_READ_TYPES = ["ont", "ont_hq", "rs", "sq", "ccs", "hifi"] | ||
MEDAKA_MODELS = [ | ||
"r103_fast_g507", "r103_fast_snp_g507", "r103_fast_variant_g507", "r103_hac_g507", "r103_hac_snp_g507", | ||
"r103_hac_variant_g507", "r103_min_high_g345", "r103_min_high_g360", "r103_prom_high_g360", "r103_prom_snp_g3210", | ||
"r103_prom_variant_g3210", "r103_sup_g507", "r103_sup_snp_g507", "r103_sup_variant_g507", "r1041_e82_260bps_fast_g632", | ||
"r1041_e82_260bps_fast_variant_g632", "r1041_e82_260bps_hac_g632", "r1041_e82_260bps_hac_variant_g632", "r1041_e82_260bps_sup_g632", | ||
"r1041_e82_260bps_sup_variant_g632", "r1041_e82_400bps_fast_g615", "r1041_e82_400bps_fast_g632", | ||
"r1041_e82_400bps_fast_variant_g615", "r1041_e82_400bps_fast_variant_g632", "r1041_e82_400bps_hac_g615", | ||
"r1041_e82_400bps_hac_g632", "r1041_e82_400bps_hac_variant_g615", "r1041_e82_400bps_hac_variant_g632", "r1041_e82_400bps_sup_g615", | ||
"r1041_e82_400bps_sup_variant_g615", "r104_e81_fast_g5015", "r104_e81_fast_variant_g5015", "r104_e81_hac_g5015", | ||
"r104_e81_hac_variant_g5015", "r104_e81_sup_g5015", "r104_e81_sup_g610", "r104_e81_sup_variant_g610", "r10_min_high_g303", | ||
"r10_min_high_g340", "r941_e81_fast_g514", "r941_e81_fast_variant_g514", "r941_e81_hac_g514", "r941_e81_hac_variant_g514", | ||
"r941_e81_sup_g514", "r941_e81_sup_variant_g514", "r941_min_fast_g303", "r941_min_fast_g507", "r941_min_fast_snp_g507", | ||
"r941_min_fast_variant_g507", "r941_min_hac_g507", "r941_min_hac_snp_g507", "r941_min_hac_variant_g507", "r941_min_high_g303", | ||
"r941_min_high_g330", "r941_min_high_g340_rle", "r941_min_high_g344", "r941_min_high_g351", "r941_min_high_g360", "r941_min_sup_g507", | ||
"r941_min_sup_snp_g507", "r941_min_sup_variant_g507", "r941_prom_fast_g303", "r941_prom_fast_g507", "r941_prom_fast_snp_g507", | ||
"r941_prom_fast_variant_g507", "r941_prom_hac_g507", "r941_prom_hac_snp_g507", "r941_prom_hac_variant_g507", "r941_prom_high_g303", | ||
"r941_prom_high_g330", "r941_prom_high_g344", "r941_prom_high_g360", "r941_prom_high_g4011", "r941_prom_snp_g303", "r941_prom_snp_g322", | ||
"r941_prom_snp_g360", "r941_prom_sup_g507", "r941_prom_sup_snp_g507", "r941_prom_sup_variant_g507", "r941_prom_variant_g303", | ||
"r941_prom_variant_g322", "r941_prom_variant_g360", "r941_sup_plant_g610", "r941_sup_plant_variant_g610" | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
import subprocess | ||
import os | ||
import sys | ||
from typing import List | ||
|
||
def spades_asssembly( | ||
input_fastq: str, | ||
input_long_reads: str, | ||
output_fasta: str, | ||
output_spades_folder: str, | ||
max_memory: int, | ||
threads: int, | ||
kmer_sizes: List[int], | ||
tmp_dir: str, | ||
long_read_type: str, | ||
log: str): | ||
|
||
''' | ||
Assemble short reads and long reads (if any) using spades | ||
:param input_fastq: short reads fastq file | ||
:param input_long_reads: long reads fastq file | ||
:param output_fasta: output fasta file | ||
:param output_spades_folder: output spades folder | ||
:param max_memory: maximum memory to use | ||
:param threads: number of threads | ||
:param tmpdir: temporary directory | ||
:param kmer_sizes: list of kmer sizes | ||
:param long_read_type: type of long reads | ||
:param log: log file | ||
:return: | ||
''' | ||
if tmp_dir: | ||
tmp_dir_arg = f"--tmp-dir {tmp_dir}" | ||
else: | ||
try: | ||
tmp_dir = os.environ["TMPDIR"] | ||
tmp_dir_arg = f"--tmp-dir {tmp_dir}" | ||
except KeyError: | ||
tmp_dir_arg = "" | ||
|
||
if os.path.exists("data/spades_assembly/tmp"): | ||
with open(log, 'a') as logf: | ||
subprocess.run("rm -rf data/spades_assembly/tmp".split(), stdout=logf, stderr=subprocess.STDOUT) | ||
# remove existing temporary directory | ||
minimumsize=500000 | ||
actualsize = int(subprocess.check_output('stat -c%s data/short_reads.filt.fastq.gz', shell=True)) | ||
# check if directory exists | ||
if os.path.exists("data/spades_assembly"): | ||
# resume previous assembly | ||
command = f"spades.py --restart-from last --memory {max_memory} -t {threads} " \ | ||
f"-o data/spades_assembly -k {kmer_sizes} {tmp_dir_arg}" | ||
# run cmd | ||
with open(log, 'a') as logf: | ||
logf.write(f"Queueing command {command}\n") | ||
subprocess.run(command.split(), stdout=logf, stderr=subprocess.STDOUT) | ||
subprocess.run("cp data/spades_assembly/scaffolds.fasta data/spades_assembly.fasta".split(), stdout=logf, stderr=subprocess.STDOUT) | ||
elif actualsize >= minimumsize: | ||
if long_read_type in ["ont","ont_hq"]: | ||
command = f"spades.py --checkpoints all --memory {max_memory} --meta --nanopore {input_long_reads} --12 {input_fastq} "\ | ||
f"-o data/spades_assembly -t {threads} -k {kmer_sizes} {tmp_dir_arg} " | ||
else: | ||
command = f"spades.py --checkpoints all --memory {max_memory} --meta --pacbio {input_long_reads} --12 {input_fastq} "\ | ||
f"-o data/spades_assembly -t {threads} -k {kmer_sizes} {tmp_dir_arg} " | ||
# run cmd | ||
with open(log, 'a') as logf: | ||
logf.write(f"Queueing command {command}\n") | ||
subprocess.run(command.split(), stdout=logf, stderr=subprocess.STDOUT) | ||
subprocess.run("cp data/spades_assembly/scaffolds.fasta data/spades_assembly.fasta".split(), stdout=logf, stderr=subprocess.STDOUT) | ||
else: | ||
with open(log, 'a') as logf: | ||
subprocess.run(f"mkdir -p {output.spades_folder} && touch {output.fasta}".split(), stdout=logf, stderr=subprocess.STDOUT) | ||
|
||
|
||
if __name__ == '__main__': | ||
log = snakemake.log[0] | ||
with open(log, 'w') as logf: pass | ||
|
||
spades_asssembly( | ||
snakemake.input.fastq, | ||
snakemake.input.long_reads, | ||
snakemake.output.fasta, | ||
snakemake.output.spades_folder, | ||
snakemake.params.max_memory, | ||
snakemake.threads, | ||
snakemake.params.kmer_sizes, | ||
snakemake.params.tmpdir, | ||
snakemake.params.long_read_type, | ||
log | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.