Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build arm #14

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
20 changes: 20 additions & 0 deletions biotools_cleaner/cleaner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env python

import argparse
import logging

from tool import Tool

logging.basicConfig()
logging.root.setLevel(logging.INFO)

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('input', type=str, help="Path to yaml file")
parser.add_argument('output', type=str, help="Output dir (ie, Research-software-ecosystem repository)")
parser.add_argument('--dry-run', action='store_true', help="Dry run")
parser.add_argument('--cleanup', action='store_true', help="Remove old layout files from repository")
args = parser.parse_args()

tool = Tool(args.input)
tool.write_yaml(args.output, dry_run=args.dry_run, remove_input=args.cleanup)
23 changes: 23 additions & 0 deletions biotools_cleaner/cleaner_batch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env python

import argparse
import pathlib
import logging

from tool import Tool

logging.basicConfig()
# logging.root.setLevel(logging.INFO)

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('rse_repo', type=str, help="Research-software-ecosystem data folder")
parser.add_argument('--dry-run', action='store_true', help="Dry run")
parser.add_argument('--cleanup', action='store_true', help="Remove old layout files from repository")
parser.add_argument('--add-label', action='store_true', help="Make sure all tools in a specific file have the same biotool label")

args = parser.parse_args()

for path in pathlib.Path(args.rse_repo).rglob("biocontainers.yaml"):
tool = Tool(str(path.resolve()))
tool.write_yaml(args.rse_repo, dry_run=args.dry_run, remove_input=args.cleanup, add_biotool=args.add_label)
77 changes: 77 additions & 0 deletions biotools_cleaner/tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from collections import defaultdict
import os
import pathlib
import logging
from yaml import safe_load, dump


class Tool:

def __init__(self, tool_yaml):
self.yaml_path = tool_yaml
self.yaml_data = {}

with open(tool_yaml, 'r') as f:
self.yaml_data = safe_load(f)

logging.info('Processing ' + tool_yaml)

def write_yaml(self, output_dir, dry_run=False, remove_input=False, add_biotool=False):
to_merge = {}
if not self.yaml_data.get('softwares'):
logging.error('"softwares" key not found or empty')
return False
if len(self.yaml_data.get('softwares')) > 1:
biotool = set()
non_biotool_label = set()
for key, soft in self.yaml_data['softwares'].items():
biotool.add(soft['labels'].get('extra.identifiers.biotools', ''))
if not soft['labels'].get('extra.identifiers.biotools'):
non_biotool_label.add(key)
if len(biotool) > 1:
if len(biotool) == 2 and '' in biotool:
logging.warn("Both empty and non-empty biotool id in {}. Assuming they are the same".format(self.yaml_path))
assumed_biotool = [x for x in biotool if x][0]
logging.warn("Adding {} to biotool {}".format(non_biotool_label, assumed_biotool))
for nbl in non_biotool_label:
to_merge[nbl] = assumed_biotool
else:
logging.error("Multiple distinct biotools in {}: stopping".format(self.yaml_path))
return False

data = defaultdict(list)

for key, values in self.yaml_data['softwares'].items():
tool_name = key
biotool_id = values['labels']['extra.identifiers.biotools'] if 'extra.identifiers.biotools' in values['labels'] else key

if tool_name in to_merge:
biotool_id = to_merge[tool_name]
logging.warn("Assuming {} biotool id is {}".format(tool_name, biotool_id))
if add_biotool:
logging.warn("Adding biotool label")
values['labels']['extra.identifiers.biotools'] = biotool_id

data[biotool_id].append({"tool": tool_name, "value": values})

for key, values in data.items():
for val in values:
output_path = os.path.join(output_dir, key, '{}.biocontainers.yaml'.format(val['tool']))

if len(values) == 1:
logging.info("Moving {} to {}".format(self.yaml_path, output_path))

else:
logging.info("Splitting {} to {}".format(self.yaml_path, output_path))

if not dry_run:
pathlib.Path(os.path.join(output_dir, key)).mkdir(parents=True, exist_ok=True)
yaml_content = {"softwares": {}}
yaml_content['softwares'][val['tool']] = val['value']

with open(output_path, 'w') as f:
dump(self.yaml_data, f)
if remove_input:
logging.info("Removing {}".format(self.yaml_path))
os.remove(self.yaml_path)
return True
92 changes: 44 additions & 48 deletions github-ci/src/biocontainersci/biotools.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,20 @@

import shutil
import requests
import sys
import os
import logging
import re
import git
import datetime
from copy import deepcopy
from yaml import load, dump
from yaml import dump
try:
from yaml import CLoader as Loader, CDumper as Dumper
from yaml import CDumper as Dumper
except ImportError:
from yaml import Loader, Dumper
from yaml import Dumper

from biocontainersci.utils import BiocontainersCIException


class Biotools:

GIT_REPO = '[email protected]:bio-tools/content.git'
Expand Down Expand Up @@ -90,15 +89,15 @@ def create_pr(self, branch):
}
github_url = 'https://api.github.com/repos/%s/pulls' % ("bio-tools/content")
res = requests.post(
github_url,
json={
'title': "biocontainers-bot metadata import PR",
'head': branch,
"base": "master"
},
headers=headers
github_url,
json={
'title': "biocontainers-bot metadata import PR",
'head': branch,
"base": "master"
},
headers=headers
)
if not res.status_code in [200, 201]:
if res.status_code not in [200, 201]:
logging.error("[biotools] Failed to create pull request: %s", res.text)
return False
pr = res.json()
Expand All @@ -107,13 +106,13 @@ def create_pr(self, branch):
github_url = 'https://api.github.com/repos/%s/issues/%d' % ("bio-tools/content", issue)

res = requests.post(
github_url,
json={
'labels': [self.BOT_LABEL],
},
headers=headers
github_url,
json={
'labels': [self.BOT_LABEL],
},
headers=headers
)
if not res.status_code in [200]:
if res.status_code not in [200]:
logging.error("Failed to add issue label: %d" % res.status_code)

logging.info("Tagged issue: %d" % issue)
Expand Down Expand Up @@ -148,41 +147,34 @@ def run(self, f, labels, branch=None):
try:
(repo, branch) = self.repo_setup(branch)

tmpdir = self.REPO + '/data/'
dirname = tmpdir + name
biocontainers_file = tmpdir + name + '/biocontainers.yaml'
all_tmpdir = self.REPO + '/import/biocontainers/'
if not os.path.exists(all_tmpdir):
os.makedirs(all_tmpdir)
files_to_write = [all_tmpdir + '{}.biocontainers.yaml'.format(name)]
if biotools is not None:
dirname = tmpdir + biotools
biocontainers_file = tmpdir + biotools + '/biocontainers.yaml'

if not os.path.exists(dirname):
os.makedirs(dirname)
biotool_tmpdir = self.REPO + '/data/{}/'.format(biotools)
if not os.path.exists(biotool_tmpdir):
os.makedirs(biotool_tmpdir)
files_to_write.append(biotool_tmpdir + '{}.biocontainers.yaml'.format(name))

clabels = {}
for k, v in labels.items():
clabels[k] = v

data = {
'software': name,
'labels': deepcopy(clabels),
'versions': []
}
'software': name,
'labels': deepcopy(clabels),
'versions': []
}

softwares = {'softwares': {}}
softwares["softwares"][name] = data
if os.path.exists(biocontainers_file):
with open(biocontainers_file) as fp:
softwares = load(fp, Loader=Loader)

if name not in softwares["softwares"]:
softwares["softwares"][name] = data
for file_path in files_to_write:

exists = False
for download in softwares["softwares"][name]["versions"]:
if download["version"] == container_version:
exists = True
break
if name not in softwares["softwares"]:
softwares["softwares"][name] = data

if not exists:
new_download = {
"url": "biocontainers/" + name + ":" + container_version,
"version": container_version,
Expand All @@ -191,14 +183,18 @@ def run(self, f, labels, branch=None):
}
softwares["softwares"][name]["versions"].append(new_download)

with open(biocontainers_file, 'w') as fp:
with open(file_path, 'w') as fp:
dump(softwares, fp, Dumper=Dumper)

repo.index.add([biocontainers_file])
if biotools is not None:
repo.index.commit("Add version for %s:%s" % (biotools, container_version))
else:
repo.index.commit("Add version for %s:%s" % (name, container_version))
changed = False
changed_files = [item.a_path for item in repo.index.diff(None)]
for file_path in files_to_write:
if file_path in changed_files:
repo.index.add([file_path])
changed = True

if changed:
repo.index.commit("Add version for %s:%s" % (name, container_version))
try:
logging.info("[biotools] Push to branch %s" % branch)

Expand Down
Loading