Skip to content

Commit

Permalink
Initial codemeta support
Browse files Browse the repository at this point in the history
  • Loading branch information
tmorrell committed Feb 5, 2018
1 parent 41a7db1 commit bdced82
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 2 deletions.
28 changes: 28 additions & 0 deletions codemeta.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"@context": "https://doi.org/10.5063/schema/codemeta-2.0",
"@type": "SoftwareSourceCode",
"description": "AMES automates updating repository metadata.",
"name": "AMES: Automated Metadata Service",
"codeRepository": "https://github.com/caltechlibrary/ames",
"issueTracker": "https://github.com/caltechlibrary/ames/issues",
"license": "https://data.caltech.edu/license",
"version": "0.0.3",
"author": [
{
"@type": "Person",
"givenName": "Thomas E",
"familyName": "Morrell",
"affiliation": "Caltech Library",
"email": "[email protected]",
"@id": "https://orcid.org/0000-0001-9266-5146"
}],
"developmentStatus": "active",
"downloadUrl": "https://github.com/caltechlibrary/ames/archive/0.0.3.zip",
"keywords": [
"GitHub",
"metadata",
"software"
],
"maintainer": "https://orcid.org/0000-0001-9266-5146",
"programmingLanguage": "Python"
}
60 changes: 59 additions & 1 deletion matchers/caltechdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ def match_cd_refs():
keys =\
subprocess.check_output(["dataset","-c","s3://dataset.library.caltech.edu/CaltechDATA","keys"],universal_newlines=True).splitlines()
for k in keys:
print(k)
metadata =\
subprocess.check_output(["dataset","-c","s3://dataset.library.caltech.edu/CaltechDATA","read",k],universal_newlines=True)
metadata = json.loads(metadata)['metadata']
Expand Down Expand Up @@ -49,12 +50,69 @@ def match_cd_refs():
print(response)
return matches

def codemeta_to_datacite(metadata):
datacite = {}
if 'author' in metadata:
creators = []
for a in metadata['author']:
cre = {}
cre['creatorName'] = a['familyName']+','+a['givenName']
cre['familyName'] = a['familyName']
cre['givenName'] = a['givenName']
if '@id' in a:
idv = a['@id']
split = idv.split('/')
idn = split[-1]
cre['nameIdentifiers']=[{\
'nameIdentifier':idn,'nameIdentifierScheme':'ORCID','schemeURI':'http://orcid.org'}]
#Should check for type and remove hard code URI
if 'affiliation' in a:
cre['affiliations'] = [a['affiliation']]
#Should check if can support multiple affiliations
creators.append(cre)
datacite['creators'] = creators
if 'license' in metadata:
#Assuming uri to name conversion, not optimal
uri = metadata['license']
name = uri.split('/')[-1]
datacite['rightsList'] = [{'rights':name,'rightsURI':uri}]
return datacite

def match_codemeta():
keys =\
subprocess.check_output(["dataset","-c","github_records","keys"],universal_newlines=True).splitlines()
for k in keys:
file_names =\
[subprocess.check_output(["dataset","attachments",k],universal_newlines=True)]
os.system("dataset "+" attached "+k)
codemeta=False
for f in file_names:
print(f)
if f.split('.')[-1] == 'zip':
files =\
subprocess.check_output(['unzip','-l',f.rstrip()],universal_newlines=True).splitlines()
i = 4 #Ignore header
line = files[i]
while line[0] != '-':
split = line.split('/')
fname = split[-1]
if fname == 'codemeta.json':
path = ''
sp = line.split(' ')[-1]
os.system('unzip -j '+f.rstrip()+' '+sp+' -d .')
codemeta = True
i = i+1
line = files[i]
#Does not sensibly handle repos with multiple codemeta
#files
os.system('rm '+f)

if codemeta == True:
token = os.environ['TINDTOK']

infile = open('codemeta.json','r')
meta = json.load(infile)
standardized = codemeta_to_datacite(meta)
response = caltechdata_edit(token,k,standardized,{},{},False)
print(response)


2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from setuptools import setup, find_packages
setup(
name = 'ames',
version ='0.0.2',
version ='0.0.3',
packages = find_packages(),
install_requires=[
'requests'
Expand Down

0 comments on commit bdced82

Please sign in to comment.