Skip to content

Commit

Permalink
Adding usage report
Browse files Browse the repository at this point in the history
Fixes #270
  • Loading branch information
cmungall committed Aug 17, 2024
1 parent 47eb456 commit 2970a03
Show file tree
Hide file tree
Showing 5 changed files with 316 additions and 0 deletions.
19 changes: 19 additions & 0 deletions src/ontology/cob.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,25 @@ $(COB_ANNOTATIONS): $(TMPDIR)/cob-annotations.ttl
--annotation owl:versionInfo $(TODAY) \
--output $@

########################################
# -- USAGES --
########################################

# USAGE_SOURCES = ontobee ubergraph bioportal
USAGE_SOURCES = ontobee ubergraph

all_usages: $(patsubst %, reports/summary-cob-usages-%.tsv, $(USAGE_SOURCES))

reports/cob-direct-subclass-counts-ontobee.tsv:
python3 ../scripts/generate_usage_sparql.py > $@

reports/cob-usages-%.tsv:
runoak -i $*: usages .idfile cob.tsv -o $@
.PRECIOUS: reports/cob-usages-%.tsv

reports/summary-cob-usages-%.tsv: reports/cob-usages-%.tsv
awk '{count[$$1]++} END {for (key in count) print key "\t" count[key]}' $< | sort -k2 -nr > $@


########################################
# -- TESTING --
Expand Down
75 changes: 75 additions & 0 deletions src/ontology/reports/cob-direct-subclass-counts-ontobee.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
Class Label Subclass Count
http://purl.obolibrary.org/obo/PR_000000001 protein 36130
http://purl.obolibrary.org/obo/OBI_0000047 processed material entity 3935
http://purl.obolibrary.org/obo/GO_0005634 cell nucleus 2782
http://purl.obolibrary.org/obo/BFO_0000015 process 2721
http://purl.obolibrary.org/obo/BFO_0000040 material entity 670
http://purl.obolibrary.org/obo/GO_0032991 protein-containing macromolecular complex 540
http://purl.obolibrary.org/obo/IAO_0000030 information 472
http://purl.obolibrary.org/obo/IAO_0000027 data item 433
http://purl.obolibrary.org/obo/BFO_0000023 role 395
http://purl.obolibrary.org/obo/OBI_0000011 completely executed planned process 347
http://purl.obolibrary.org/obo/GO_0008150 biological process 293
http://purl.obolibrary.org/obo/IAO_0000310 document 211
http://purl.obolibrary.org/obo/OBI_0200000 data transformation 210
http://purl.obolibrary.org/obo/OBI_0000070 assay 209
http://purl.obolibrary.org/obo/OBI_0100026 organism 172
http://purl.obolibrary.org/obo/CL_0000000 cell 167
http://purl.obolibrary.org/obo/BFO_0000016 disposition 156
http://purl.obolibrary.org/obo/OBI_0000094 material processing 133
http://purl.obolibrary.org/obo/ENVO_01000813 geophysical entity 91
http://purl.obolibrary.org/obo/OBI_0000245 organization 87
http://purl.obolibrary.org/obo/GO_0003674 gene product or complex activity 85
http://purl.obolibrary.org/obo/BFO_0000034 function 83
http://purl.obolibrary.org/obo/IAO_0000033 directive information entity 79
http://purl.obolibrary.org/obo/IAO_0000104 plan specification 77
http://purl.obolibrary.org/obo/CL_0000003 native cell 72
http://purl.obolibrary.org/obo/IAO_0000005 objective specification 68
http://purl.obolibrary.org/obo/OGMS_0000014 phenotypic finding 66
http://purl.obolibrary.org/obo/ENVO_02500000 environmental process 61
http://purl.obolibrary.org/obo/BFO_0000141 immaterial entity 61
http://purl.obolibrary.org/obo/BFO_0000029 site 58
http://purl.obolibrary.org/obo/MOP_0000543 physico-chemical process 49
http://purl.obolibrary.org/obo/BFO_0000020 characteristic 49
http://purl.obolibrary.org/obo/CHEBI_33250 uncharged atom 49
http://purl.obolibrary.org/obo/OGMS_0000073 disease diagnosis 42
http://purl.obolibrary.org/obo/ENVO_01001110 ecosystem 37
http://purl.obolibrary.org/obo/BFO_0000017 realizable 36
http://purl.obolibrary.org/obo/PATO_0000125 mass 33
http://purl.obolibrary.org/obo/OBI_0000066 investigation 32
http://purl.obolibrary.org/obo/PCO_0000000 collection of organisms 18
http://purl.obolibrary.org/obo/OGMS_0000063 disease course 14
http://purl.obolibrary.org/obo/CARO_0001008 gross anatomical part 13
http://purl.obolibrary.org/obo/CHEBI_36342 subatomic particle 11
http://purl.obolibrary.org/obo/CHEBI_33696 nucleic acid polymer 11
http://purl.obolibrary.org/obo/COB_0000032 geographical location 10
http://purl.obolibrary.org/obo/OBI_0001909 conclusion based on data 7
http://purl.obolibrary.org/obo/CHEBI_33252 atomic nucleus 5
http://purl.obolibrary.org/obo/COB_0000082 planned process 5
http://purl.obolibrary.org/obo/CHEBI_24867 monoatomic ion 5
http://purl.obolibrary.org/obo/COB_0000013 molecular entity 4
http://purl.obolibrary.org/obo/COB_0000080 complex of molecular entities 4
http://purl.obolibrary.org/obo/SEPIO_0000048 agent role 3
http://purl.obolibrary.org/obo/PATO_0002193 charge 3
http://purl.obolibrary.org/obo/CARO_0010004 cellular organism 3
http://purl.obolibrary.org/obo/COB_0000011 atom 2
http://purl.obolibrary.org/obo/COB_0000020 subcellular structure 2
http://purl.obolibrary.org/obo/CL_0001034 cell in vitro 2
http://purl.obolibrary.org/obo/OBI_0000260 plan 2
http://purl.obolibrary.org/obo/CHEBI_10545 electron 1
http://purl.obolibrary.org/obo/COB_0000120 information representation 1
http://purl.obolibrary.org/obo/CHEBI_24636 proton 0
http://purl.obolibrary.org/obo/CHEBI_30222 neutron 0
http://purl.obolibrary.org/obo/COB_0000005 obsolete_elementary charge 0
http://purl.obolibrary.org/obo/COB_0000014 obsolete macromolecular entity 0
http://purl.obolibrary.org/obo/COB_0000056 immaterial anatomical entity 0
http://purl.obolibrary.org/obo/COB_0000073 gene product 0
http://purl.obolibrary.org/obo/COB_0000077 action specification 0
http://purl.obolibrary.org/obo/COB_0000083 failed planned process 0
http://purl.obolibrary.org/obo/COB_0000116 cellular membrane 0
http://purl.obolibrary.org/obo/COB_0000119 evaluant role 0
http://purl.obolibrary.org/obo/COB_0000121 measurement datum 0
http://purl.obolibrary.org/obo/COB_0000122 physical information carrier 0
http://purl.obolibrary.org/obo/COB_0001000 exposure of organism 0
http://purl.obolibrary.org/obo/COB_0001300 device 0
http://purl.obolibrary.org/obo/DRON_0000005 drug product 0
77 changes: 77 additions & 0 deletions src/ontology/reports/summary-cob-usages-ontobee.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
PR:000000001 36181
OBI:0000047 3944
GO:0005634 2955
BFO:0000015 2746
BFO:0000040 741
CL:0000000 688
GO:0032991 560
IAO:0000030 507
IAO:0000027 474
BFO:0000023 415
GO:0008150 411
OBI:0000011 359
OBI:0100026 244
OBI:0000070 226
IAO:0000310 224
OBI:0200000 221
BFO:0000016 164
OBI:0000094 150
OBI:0000245 111
GO:0003674 106
CHEBI:10545 106
CL:0000003 99
ENVO:01000813 96
BFO:0000034 95
IAO:0000104 89
IAO:0000033 83
IAO:0000005 77
OGMS:0000014 72
BFO:0000029 70
CHEBI:33250 67
ENVO:02500000 66
BFO:0000020 65
BFO:0000141 63
PATO:0000125 52
MOP:0000543 51
OGMS:0000073 48
OBI:0000066 48
CHEBI:36342 47
ENVO:01001110 45
BFO:0000017 40
CHEBI:33696 33
PCO:0000000 27
OGMS:0000063 23
CARO:0001008 21
CHEBI:24867 18
CHEBI:24636 14
COB:0000032 13
CHEBI:33252 13
OBI:0001909 12
PATO:0002193 8
COB:0000082 8
COB:0000013 8
SEPIO:0000048 7
OBI:0000260 7
COB:0000080 6
" 6
COB:0000077 5
COB:0000011 5
CL:0001034 5
CARO:0010004 5
COB:0000020 4
COB:0001300 3
COB:0000120 3
COB:0000073 2
CHEBI:30222 2
used_id 1
ID 1
DRON:0000005 1
COB:0001000 1
COB:0000122 1
COB:0000121 1
COB:0000119 1
COB:0000116 1
COB:0000083 1
COB:0000056 1
COB:0000014 1
COB:0000005 1
76 changes: 76 additions & 0 deletions src/ontology/reports/summary-cob-usages-ubergraph.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
PR:000000001 21803
BFO:0000017 4554
GO:0005634 2961
CL:0000000 930
CARO:0010004 779
CHEBI:10545 759
CHEBI:33252 754
GO:0008150 588
GO:0032991 485
BFO:0000040 450
COB:0001000 141
ENVO:01000813 140
OBI:0000070 129
BFO:0000034 129
BFO:0000015 121
OBI:0000011 84
OBI:0000047 82
OBI:0100026 69
IAO:0000030 67
ENVO:02500000 65
OBI:0000094 60
CHEBI:33250 58
BFO:0000020 58
IAO:0000027 53
OBI:0200000 49
GO:0003674 49
CHEBI:33696 49
BFO:0000023 49
ENVO:01001110 37
IAO:0000104 32
IAO:0000033 27
IAO:0000005 27
OBI:0000066 25
CHEBI:24867 23
OBI:0001909 20
OBI:0000245 20
IAO:0000310 20
BFO:0000016 19
PATO:0000125 18
CHEBI:36342 17
COB:0000013 15
OGMS:0000063 11
COB:0000011 11
PCO:0000000 10
PATO:0002193 10
OBI:0000260 10
COB:0000082 10
COB:0000120 9
BFO:0000141 9
BFO:0000029 9
COB:0001300 8
COB:0000121 8
COB:0000080 8
COB:0000077 8
CL:0000003 8
CHEBI:24636 8
SEPIO:0000048 6
OGMS:0000073 6
COB:0000020 6
CARO:0001008 6
COB:0000056 5
CL:0001034 5
OGMS:0000014 4
MOP:0000543 4
DRON:0000005 4
COB:0000122 4
COB:0000032 4
COB:0000073 3
CHEBI:30222 3
COB:0000119 2
used_id 1
ID 1
COB:0000116 1
COB:0000083 1
COB:0000014 1
COB:0000005 1
69 changes: 69 additions & 0 deletions src/scripts/generate_usage_sparql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import requests

labels = {}

def expand(curie: str) -> str:
return f"http://purl.obolibrary.org/obo/{curie.replace(':', '_')}"

def read_uris_from_file(filename):
tuples = []
with open(filename, 'r') as file:
for line in file:
line = line.strip()
if line.startswith("ID"):
continue
toks = line.split("\t")
curie = toks[0]
label = toks[1]
uri = expand(curie)
labels[uri] = label
tuples.append((uri, label))
return tuples

def generate_sparql_query(uris):
values_clause = "\n ".join(f"<{uri}>" for uri in uris)
return f"""PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX obo: <http://purl.obolibrary.org/obo/>
SELECT ?class (COUNT(DISTINCT ?subclass) AS ?subclassCount)
WHERE {{
VALUES ?class {{
{values_clause}
}}
?subclass rdfs:subClassOf ?class .
FILTER (?subclass != ?class)
}}
GROUP BY ?class
ORDER BY DESC(?subclassCount)"""

def execute_sparql_query(query):
endpoint = "https://sparql.hegroup.org/sparql"
headers = {
"Accept": "application/sparql-results+json",
"Content-Type": "application/x-www-form-urlencoded"
}
data = {
"query": query,
"format": "json"
}
response = requests.post(endpoint, headers=headers, data=data)
return response.json()

# Usage
pairs = read_uris_from_file('cob.tsv')
uris = [p[0] for p in pairs]
query = generate_sparql_query(uris)

results = execute_sparql_query(query)

# Process and print results
print("Class\tLabel\tSubclass Count")
for result in results['results']['bindings']:
class_uri = result['class']['value']
# label = result['label']['value']
label = labels[class_uri]
count = result['subclassCount']['value']
print(f"{class_uri}\t{label}\t{count}")
del labels[class_uri]
for class_uri, label in labels.items():
print(f"{class_uri}\t{label}\t0")

0 comments on commit 2970a03

Please sign in to comment.