Skip to content

Commit

Permalink
cleanup semibin_model handling
Browse files Browse the repository at this point in the history
  • Loading branch information
AroneyS committed Nov 28, 2024
1 parent 14634a3 commit fc4e84d
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 3 deletions.
13 changes: 10 additions & 3 deletions aviary/modules/binning/binning.smk
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ rule semibin:
bams_indexed = ancient("data/binning_bams/done")
params:
# Can't use premade model with multiple samples, so disregard if provided
semibin_model = config['semibin_model']
semibin_model = f"--environment {config['semibin_model']} " if len(config["short_reads_1"]) == 1 else ""
output:
done = "data/semibin_bins/done"
threads:
Expand All @@ -389,8 +389,15 @@ rule semibin:
shell:
"rm -rf data/semibin_bins/; "
"mkdir -p data/semibin_bins/output_bins/; "
"SemiBin2 single_easy_bin -i {input.fasta} -b data/binning_bams/*.bam -o data/semibin_bins --environment {params.semibin_model} -p {threads} --self-supervised --compression none > {log} 2>&1 && "
"touch {output.done} || SemiBin2 single_easy_bin -i {input.fasta} -b data/binning_bams/*.bam -o data/semibin_bins -p {threads} --self-supervised --compression none > {log} 2>&1 "
"SemiBin2 single_easy_bin "
"-i {input.fasta} "
"-b data/binning_bams/*.bam "
"-o data/semibin_bins "
"{params.semibin_model} "
"-p {threads} "
"--self-supervised "
"--compression none "
"> {log} 2>&1 "
"&& touch {output.done} || touch {output.done}"

rule checkm_rosella:
Expand Down
42 changes: 42 additions & 0 deletions test/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,48 @@ def test_short_read_recovery_fast(self):
num_lines = sum(1 for _ in f)
self.assertEqual(num_lines, 3)

semibin_log_path = f"{output_dir}/aviary_out/logs/semibin.log"
self.assertTrue(os.path.isfile(semibin_log_path))
with open(semibin_log_path) as f:
log = f.read()
self.assertTrue("Training model..." not in log)

self.assertFalse(os.path.isfile(f"{output_dir}/aviary_out/data/final_contigs.fasta"))

def test_short_read_recovery_semibin(self):
output_dir = os.path.join("example", "test_short_read_recovery_semibin")
self.setup_output_dir(output_dir)

cmd = f"ln -sr {data}/wgsim.1.fq.gz {output_dir}/wgsim2.1.fq.gz && ln -sr {data}/wgsim.2.fq.gz {output_dir}/wgsim2.2.fq.gz"
subprocess.run(cmd, shell=True, check=True)

cmd = (
f"aviary recover "
f"--assembly {data}/assembly.fasta "
f"-o {output_dir}/aviary_out "
f"-1 {data}/wgsim.1.fq.gz {output_dir}/wgsim2.1.fq.gz "
f"-2 {data}/wgsim.2.fq.gz {output_dir}/wgsim2.2.fq.gz "
f"--binning-only "
f"--skip-binners rosella vamb metabat "
f"--skip-qc "
f"--refinery-max-iterations 0 "
f"--conda-prefix {path_to_conda} "
f"-n 32 -t 32 "
)
subprocess.run(cmd, shell=True, check=True)

bin_info_path = f"{output_dir}/aviary_out/bins/bin_info.tsv"
self.assertTrue(os.path.isfile(bin_info_path))
with open(bin_info_path) as f:
num_lines = sum(1 for _ in f)
self.assertEqual(num_lines, 3)

semibin_log_path = f"{output_dir}/aviary_out/logs/semibin.log"
self.assertTrue(os.path.isfile(semibin_log_path))
with open(semibin_log_path) as f:
log = f.read()
self.assertTrue("Training model..." in log)

self.assertFalse(os.path.isfile(f"{output_dir}/aviary_out/data/final_contigs.fasta"))

def test_short_read_recovery_vamb(self):
Expand Down

0 comments on commit fc4e84d

Please sign in to comment.