Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into test
Browse files Browse the repository at this point in the history
  • Loading branch information
azat-badretdin committed Feb 9, 2022
2 parents 94c97cf + 1c1f5aa commit 09774c7
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 17 deletions.
2 changes: 1 addition & 1 deletion assemble.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
class: Workflow
cwlVersion: v1.2
doc: |
Assemble a set of reads using SKESA
Assemble a set of reads using SKESA
requirements:
- class: MultipleInputFeatureRequirement
inputs:
Expand Down
113 changes: 97 additions & 16 deletions scripts/pgap.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import argparse
import atexit
import contextlib
import glob
import json
import os
Expand All @@ -24,7 +25,9 @@
import threading
import time
import tempfile
import contextlib
import xml
import xml.dom.minidom


from io import open
from urllib.parse import urlparse, urlencode
Expand Down Expand Up @@ -165,7 +168,7 @@ def __init__(self, params, local_input, pipeline):
self.input_file = '/pgap/user_input/pgap_input.yaml'
submol = self.get_submol(local_input)
if ( submol != None ):
self.submol = self.create_submolfile(submol)
self.submol = self.create_submolfile(submol, params.ani_output, params.ani_hr_output, params.args.auto_correct_tax)
else:
self.submol = None
self.yaml = self.create_inputfile(local_input)
Expand Down Expand Up @@ -222,14 +225,17 @@ def make_docker_cmd(self):


def make_podman_cmd(self):
self.cmd = [self.params.docker_cmd, 'run', '-i', '--rm' ]
self.cmd = [self.params.docker_cmd]
if self.params.args.debug:
self.cmd.extend(['--log-level', 'debug'])
self.cmd.extend(['run', '-i', '--rm' ])

self.cmd.extend([
'--volume', '{}:/pgap/input:ro'.format(self.data_dir),
'--volume', '{}:/pgap/user_input'.format(self.input_dir),
'--volume', '{}:{}:ro'.format(self.yaml, self.input_file ),
'--volume', '{}:/pgap/input:ro,Z'.format(self.data_dir),
'--volume', '{}:/pgap/user_input:Z'.format(self.input_dir),
'--volume', '{}:{}:ro,Z'.format(self.yaml, self.input_file ),
'--volume', '{}:/tmp:rw'.format(os.getenv("TMPDIR", "/tmp")),
'--volume', '{}:/pgap/output:rw'.format(self.params.outputdir)])
'--volume', '{}:/pgap/output:rw,Z'.format(self.params.outputdir)])

if (self.params.args.memory):
self.cmd.extend(['--memory', self.params.args.memory])
Expand All @@ -239,6 +245,8 @@ def make_podman_cmd(self):
log_dir = self.params.outputdir + '/debug/log'
os.makedirs(log_dir, exist_ok=True)
self.cmd.extend(['--volume', '{}:/log/srv'.format(log_dir)])
if self.params.args.container_name:
self.cmd.extend(['--name', self.params.args.container_name])
self.cmd.append(self.params.docker_image)

def make_singularity_cmd(self):
Expand All @@ -256,7 +264,11 @@ def make_singularity_cmd(self):
log_dir = self.params.outputdir + '/debug/log'
os.makedirs(log_dir, exist_ok=True)
self.cmd.extend(['--bind', '{}:/log/srv'.format(log_dir)])
self.cmd.extend(["--pwd", "/pgap", "docker://" + self.params.docker_image])

if self.params.args.container_path:
self.cmd.extend(["--pwd", "/pgap", self.params.docker_image])
else:
self.cmd.extend(["--pwd", "/pgap", "docker://" + self.params.docker_image])

def get_submol(self, local_input):
with open(local_input, 'r') as fIn:
Expand All @@ -278,11 +290,35 @@ def regexp_file(self, filename, field):
for line in fIn:
if(re.search(field,line)):
return True
return False;

def create_submolfile(self, local_submol):
return False
def get_genus_species(self, xml_file):
genus_species = None
try:
doc = xml.dom.minidom.parse(xml_file)
predicted_taxid = doc.getElementsByTagName('predicted-taxid')[0]
if predicted_taxid.getAttribute('confidence')=='HIGH':
genus_species = predicted_taxid.getAttribute('org-name')
except:
genus_species = None
return genus_species

def create_submolfile(self, local_submol, ani_output, ani_hr_output, auto_correct_tax):
has_authors = self.regexp_file(local_submol, '^authors:')
has_contact_info = self.regexp_file(local_submol, '^contact_info:')
genus_species = None
if auto_correct_tax:
if ani_output != None:
genus_species = self.get_genus_species(ani_output)
if genus_species != None:
print('ANI analysis detected species "{}", and we will use it for PGAP'.format(genus_species))
else:
if ani_hr_output != None:
chosen_ani_output_type = ani_hr_output
else:
chosen_ani_output_type = ani_output
print('ERROR: taxcheck failed to assign a species with high confidence, thus PGAP will not execute. See {}'.format(chosen_ani_output_type))

sys.exit(1)
with tempfile.NamedTemporaryFile(mode='w',
suffix=".yaml",
prefix="pgap_submol_",
Expand All @@ -292,6 +328,10 @@ def create_submolfile(self, local_submol):
with open(local_submol, 'r') as fIn:
for line in fIn:
if line: # skip empty lines
if auto_correct_tax and genus_species != None and re.match(r'\s+genus_species:', line):
print('replacing organism in line: {}'.format(line.rstrip()))
line = re.sub(r'genus_species:.*', "genus_species: '{}'".format(genus_species), line)
print('with organism: {}'.format(genus_species))
fOut.write(line.rstrip())
fOut.write(u'\n')
if has_authors == False:
Expand Down Expand Up @@ -354,7 +394,8 @@ def check_runtime_setting(settings, value, min):
print('WARNING: {} is less than the recommended value of {}'.format(value, min))

if (self.params.docker_type == 'singularity'):
cmd = [self.params.docker_cmd, 'exec', '--bind', '{}:/cwd:ro'.format(os.getcwd()), "docker://"+self.params.docker_image,
singularity_docker_image = self.params.docker_image if self.params.args.container_path else "docker://"+self.params.docker_image
cmd = [self.params.docker_cmd, 'exec', '--bind', '{}:/cwd:ro'.format(os.getcwd()), singularity_docker_image,
'bash', '-c', 'df -k /cwd /tmp ; ulimit -a ; cat /proc/{meminfo,cpuinfo}']
else:
cmd = [self.params.docker_cmd, 'run', '-i', '-v', '{}:/cwd'.format(os.getcwd()), self.params.docker_image,
Expand Down Expand Up @@ -438,6 +479,9 @@ class Setup:

def __init__(self, args):
self.args = args
self.ani_output = None
# human readable ANI output file
self.ani_hr_output = None
self.branch = self.get_branch()
self.repo = self.get_repo()
self.rundir = self.get_dir()
Expand All @@ -455,7 +499,10 @@ def __init__(self, args):
self.list_remote_versions()
return
self.use_version = self.get_use_version()
self.docker_image = "ncbi/{}:{}".format(self.repo, self.use_version)
if self.args.container_path:
self.docker_image = self.args.container_path
else:
self.docker_image = "ncbi/{}:{}".format(self.repo, self.use_version)
self.data_path = '{}/input-{}'.format(self.rundir, self.use_version)
self.test_genomes_path = '{}/test_genomes-{}'.format(self.rundir, self.use_version)
self.outputdir = self.get_output_dir()
Expand Down Expand Up @@ -647,6 +694,7 @@ def check_install_data(self):
packages = ['pgap']
for package in packages:
guard_file = f"{self.rundir}/input-{self.use_version}/.{package}_complete"
return True # DEBUG only do not commit
if not os.path.isfile(guard_file):
return False
return True
Expand Down Expand Up @@ -776,6 +824,9 @@ def main():
parser.add_argument("--container-name",
dest='container_name',
help='Specify a container name that will be used instead of automatically generated.')
parser.add_argument("--container-path",
dest='container_path',
help='Override path to image.')
parser.add_argument("--ignore-all-errors",
dest='ignore_all_errors',
action='store_true',
Expand All @@ -784,6 +835,12 @@ def main():
dest='no_internet',
action='store_true',
help='Disable internet access for all programs in pipeline.')
parser.add_argument("--auto-correct-tax",
dest='auto_correct_tax',
action='store_true',
help='''
If flag is set, run ANI first, then PGAP, overriding the organism name provided by the user in the input YAML with the value returned by ANI Predicted organism. Obviously both actions need to be requested for this flag to take effect
''')
parser.add_argument('-D', '--docker', metavar='path',
help='Docker-compatible executable (e.g. docker, podman, singularity), which may include a full path like /usr/bin/docker')
parser.add_argument('-o', '--output', metavar='path', default='output',
Expand Down Expand Up @@ -814,12 +871,36 @@ def main():
retcode = p.launch()
p.cleaunup()
if args.ignore_all_errors == False:
# args.output for some reason not always available
time.sleep(1)
# analyze ani output here
if not os.path.exists(args.output):
raise
print("INTERNAL(SYSTEM)PROBLEM: abort: output directory does not exist: {}".format(args.output))
sys.exit(1)
params.ani_output = os.path.join(args.output, "ani-tax-report.xml")
params.ani_hr_output = os.path.join(args.output, "ani-tax-report.txt")
if os.path.exists(params.ani_output) and os.path.getsize(params.ani_output) > 0:
True
else:
params.ani_output = None
if os.path.exists(params.ani_hr_output) and os.path.getsize(params.ani_hr_output) > 0:
True
else:
params.ani_hr_output = None

errors_xml_fn = os.path.join(args.output, "errors.xml")
if os.path.exists(errors_xml_fn) and os.path.getsize(errors_xml_fn) > 0:
raise
# if there are errors
# and we do not want to recover them when it is recoverable
# then bail
if os.path.exists(errors_xml_fn) and os.path.getsize(errors_xml_fn) > 0 and not ( args.auto_correct_tax and params.ani_output != None ) :
error_file = None
if params.ani_output != None:
error_file = params.ani_output
else:
error_file = errors_xml_fn
print("ERROR: taxcheck calls the genome misassigned or contaminated, thus PGAP will not execute. See {}".format(error_file))
sys.exit(1)

if not args.ani_only:
p = Pipeline(params, args.input, "pgap")
retcode = p.launch()
Expand Down

0 comments on commit 09774c7

Please sign in to comment.