-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fixes #270
- Loading branch information
Showing
5 changed files
with
316 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
75 changes: 75 additions & 0 deletions
75
src/ontology/reports/cob-direct-subclass-counts-ontobee.tsv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
Class Label Subclass Count | ||
http://purl.obolibrary.org/obo/PR_000000001 protein 36130 | ||
http://purl.obolibrary.org/obo/OBI_0000047 processed material entity 3935 | ||
http://purl.obolibrary.org/obo/GO_0005634 cell nucleus 2782 | ||
http://purl.obolibrary.org/obo/BFO_0000015 process 2721 | ||
http://purl.obolibrary.org/obo/BFO_0000040 material entity 670 | ||
http://purl.obolibrary.org/obo/GO_0032991 protein-containing macromolecular complex 540 | ||
http://purl.obolibrary.org/obo/IAO_0000030 information 472 | ||
http://purl.obolibrary.org/obo/IAO_0000027 data item 433 | ||
http://purl.obolibrary.org/obo/BFO_0000023 role 395 | ||
http://purl.obolibrary.org/obo/OBI_0000011 completely executed planned process 347 | ||
http://purl.obolibrary.org/obo/GO_0008150 biological process 293 | ||
http://purl.obolibrary.org/obo/IAO_0000310 document 211 | ||
http://purl.obolibrary.org/obo/OBI_0200000 data transformation 210 | ||
http://purl.obolibrary.org/obo/OBI_0000070 assay 209 | ||
http://purl.obolibrary.org/obo/OBI_0100026 organism 172 | ||
http://purl.obolibrary.org/obo/CL_0000000 cell 167 | ||
http://purl.obolibrary.org/obo/BFO_0000016 disposition 156 | ||
http://purl.obolibrary.org/obo/OBI_0000094 material processing 133 | ||
http://purl.obolibrary.org/obo/ENVO_01000813 geophysical entity 91 | ||
http://purl.obolibrary.org/obo/OBI_0000245 organization 87 | ||
http://purl.obolibrary.org/obo/GO_0003674 gene product or complex activity 85 | ||
http://purl.obolibrary.org/obo/BFO_0000034 function 83 | ||
http://purl.obolibrary.org/obo/IAO_0000033 directive information entity 79 | ||
http://purl.obolibrary.org/obo/IAO_0000104 plan specification 77 | ||
http://purl.obolibrary.org/obo/CL_0000003 native cell 72 | ||
http://purl.obolibrary.org/obo/IAO_0000005 objective specification 68 | ||
http://purl.obolibrary.org/obo/OGMS_0000014 phenotypic finding 66 | ||
http://purl.obolibrary.org/obo/ENVO_02500000 environmental process 61 | ||
http://purl.obolibrary.org/obo/BFO_0000141 immaterial entity 61 | ||
http://purl.obolibrary.org/obo/BFO_0000029 site 58 | ||
http://purl.obolibrary.org/obo/MOP_0000543 physico-chemical process 49 | ||
http://purl.obolibrary.org/obo/BFO_0000020 characteristic 49 | ||
http://purl.obolibrary.org/obo/CHEBI_33250 uncharged atom 49 | ||
http://purl.obolibrary.org/obo/OGMS_0000073 disease diagnosis 42 | ||
http://purl.obolibrary.org/obo/ENVO_01001110 ecosystem 37 | ||
http://purl.obolibrary.org/obo/BFO_0000017 realizable 36 | ||
http://purl.obolibrary.org/obo/PATO_0000125 mass 33 | ||
http://purl.obolibrary.org/obo/OBI_0000066 investigation 32 | ||
http://purl.obolibrary.org/obo/PCO_0000000 collection of organisms 18 | ||
http://purl.obolibrary.org/obo/OGMS_0000063 disease course 14 | ||
http://purl.obolibrary.org/obo/CARO_0001008 gross anatomical part 13 | ||
http://purl.obolibrary.org/obo/CHEBI_36342 subatomic particle 11 | ||
http://purl.obolibrary.org/obo/CHEBI_33696 nucleic acid polymer 11 | ||
http://purl.obolibrary.org/obo/COB_0000032 geographical location 10 | ||
http://purl.obolibrary.org/obo/OBI_0001909 conclusion based on data 7 | ||
http://purl.obolibrary.org/obo/CHEBI_33252 atomic nucleus 5 | ||
http://purl.obolibrary.org/obo/COB_0000082 planned process 5 | ||
http://purl.obolibrary.org/obo/CHEBI_24867 monoatomic ion 5 | ||
http://purl.obolibrary.org/obo/COB_0000013 molecular entity 4 | ||
http://purl.obolibrary.org/obo/COB_0000080 complex of molecular entities 4 | ||
http://purl.obolibrary.org/obo/SEPIO_0000048 agent role 3 | ||
http://purl.obolibrary.org/obo/PATO_0002193 charge 3 | ||
http://purl.obolibrary.org/obo/CARO_0010004 cellular organism 3 | ||
http://purl.obolibrary.org/obo/COB_0000011 atom 2 | ||
http://purl.obolibrary.org/obo/COB_0000020 subcellular structure 2 | ||
http://purl.obolibrary.org/obo/CL_0001034 cell in vitro 2 | ||
http://purl.obolibrary.org/obo/OBI_0000260 plan 2 | ||
http://purl.obolibrary.org/obo/CHEBI_10545 electron 1 | ||
http://purl.obolibrary.org/obo/COB_0000120 information representation 1 | ||
http://purl.obolibrary.org/obo/CHEBI_24636 proton 0 | ||
http://purl.obolibrary.org/obo/CHEBI_30222 neutron 0 | ||
http://purl.obolibrary.org/obo/COB_0000005 obsolete_elementary charge 0 | ||
http://purl.obolibrary.org/obo/COB_0000014 obsolete macromolecular entity 0 | ||
http://purl.obolibrary.org/obo/COB_0000056 immaterial anatomical entity 0 | ||
http://purl.obolibrary.org/obo/COB_0000073 gene product 0 | ||
http://purl.obolibrary.org/obo/COB_0000077 action specification 0 | ||
http://purl.obolibrary.org/obo/COB_0000083 failed planned process 0 | ||
http://purl.obolibrary.org/obo/COB_0000116 cellular membrane 0 | ||
http://purl.obolibrary.org/obo/COB_0000119 evaluant role 0 | ||
http://purl.obolibrary.org/obo/COB_0000121 measurement datum 0 | ||
http://purl.obolibrary.org/obo/COB_0000122 physical information carrier 0 | ||
http://purl.obolibrary.org/obo/COB_0001000 exposure of organism 0 | ||
http://purl.obolibrary.org/obo/COB_0001300 device 0 | ||
http://purl.obolibrary.org/obo/DRON_0000005 drug product 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
PR:000000001 36181 | ||
OBI:0000047 3944 | ||
GO:0005634 2955 | ||
BFO:0000015 2746 | ||
BFO:0000040 741 | ||
CL:0000000 688 | ||
GO:0032991 560 | ||
IAO:0000030 507 | ||
IAO:0000027 474 | ||
BFO:0000023 415 | ||
GO:0008150 411 | ||
OBI:0000011 359 | ||
OBI:0100026 244 | ||
OBI:0000070 226 | ||
IAO:0000310 224 | ||
OBI:0200000 221 | ||
BFO:0000016 164 | ||
OBI:0000094 150 | ||
OBI:0000245 111 | ||
GO:0003674 106 | ||
CHEBI:10545 106 | ||
CL:0000003 99 | ||
ENVO:01000813 96 | ||
BFO:0000034 95 | ||
IAO:0000104 89 | ||
IAO:0000033 83 | ||
IAO:0000005 77 | ||
OGMS:0000014 72 | ||
BFO:0000029 70 | ||
CHEBI:33250 67 | ||
ENVO:02500000 66 | ||
BFO:0000020 65 | ||
BFO:0000141 63 | ||
PATO:0000125 52 | ||
MOP:0000543 51 | ||
OGMS:0000073 48 | ||
OBI:0000066 48 | ||
CHEBI:36342 47 | ||
ENVO:01001110 45 | ||
BFO:0000017 40 | ||
CHEBI:33696 33 | ||
PCO:0000000 27 | ||
OGMS:0000063 23 | ||
CARO:0001008 21 | ||
CHEBI:24867 18 | ||
CHEBI:24636 14 | ||
COB:0000032 13 | ||
CHEBI:33252 13 | ||
OBI:0001909 12 | ||
PATO:0002193 8 | ||
COB:0000082 8 | ||
COB:0000013 8 | ||
SEPIO:0000048 7 | ||
OBI:0000260 7 | ||
COB:0000080 6 | ||
" 6 | ||
COB:0000077 5 | ||
COB:0000011 5 | ||
CL:0001034 5 | ||
CARO:0010004 5 | ||
COB:0000020 4 | ||
COB:0001300 3 | ||
COB:0000120 3 | ||
COB:0000073 2 | ||
CHEBI:30222 2 | ||
used_id 1 | ||
ID 1 | ||
DRON:0000005 1 | ||
COB:0001000 1 | ||
COB:0000122 1 | ||
COB:0000121 1 | ||
COB:0000119 1 | ||
COB:0000116 1 | ||
COB:0000083 1 | ||
COB:0000056 1 | ||
COB:0000014 1 | ||
COB:0000005 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
PR:000000001 21803 | ||
BFO:0000017 4554 | ||
GO:0005634 2961 | ||
CL:0000000 930 | ||
CARO:0010004 779 | ||
CHEBI:10545 759 | ||
CHEBI:33252 754 | ||
GO:0008150 588 | ||
GO:0032991 485 | ||
BFO:0000040 450 | ||
COB:0001000 141 | ||
ENVO:01000813 140 | ||
OBI:0000070 129 | ||
BFO:0000034 129 | ||
BFO:0000015 121 | ||
OBI:0000011 84 | ||
OBI:0000047 82 | ||
OBI:0100026 69 | ||
IAO:0000030 67 | ||
ENVO:02500000 65 | ||
OBI:0000094 60 | ||
CHEBI:33250 58 | ||
BFO:0000020 58 | ||
IAO:0000027 53 | ||
OBI:0200000 49 | ||
GO:0003674 49 | ||
CHEBI:33696 49 | ||
BFO:0000023 49 | ||
ENVO:01001110 37 | ||
IAO:0000104 32 | ||
IAO:0000033 27 | ||
IAO:0000005 27 | ||
OBI:0000066 25 | ||
CHEBI:24867 23 | ||
OBI:0001909 20 | ||
OBI:0000245 20 | ||
IAO:0000310 20 | ||
BFO:0000016 19 | ||
PATO:0000125 18 | ||
CHEBI:36342 17 | ||
COB:0000013 15 | ||
OGMS:0000063 11 | ||
COB:0000011 11 | ||
PCO:0000000 10 | ||
PATO:0002193 10 | ||
OBI:0000260 10 | ||
COB:0000082 10 | ||
COB:0000120 9 | ||
BFO:0000141 9 | ||
BFO:0000029 9 | ||
COB:0001300 8 | ||
COB:0000121 8 | ||
COB:0000080 8 | ||
COB:0000077 8 | ||
CL:0000003 8 | ||
CHEBI:24636 8 | ||
SEPIO:0000048 6 | ||
OGMS:0000073 6 | ||
COB:0000020 6 | ||
CARO:0001008 6 | ||
COB:0000056 5 | ||
CL:0001034 5 | ||
OGMS:0000014 4 | ||
MOP:0000543 4 | ||
DRON:0000005 4 | ||
COB:0000122 4 | ||
COB:0000032 4 | ||
COB:0000073 3 | ||
CHEBI:30222 3 | ||
COB:0000119 2 | ||
used_id 1 | ||
ID 1 | ||
COB:0000116 1 | ||
COB:0000083 1 | ||
COB:0000014 1 | ||
COB:0000005 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
import requests | ||
|
||
labels = {} | ||
|
||
def expand(curie: str) -> str: | ||
return f"http://purl.obolibrary.org/obo/{curie.replace(':', '_')}" | ||
|
||
def read_uris_from_file(filename): | ||
tuples = [] | ||
with open(filename, 'r') as file: | ||
for line in file: | ||
line = line.strip() | ||
if line.startswith("ID"): | ||
continue | ||
toks = line.split("\t") | ||
curie = toks[0] | ||
label = toks[1] | ||
uri = expand(curie) | ||
labels[uri] = label | ||
tuples.append((uri, label)) | ||
return tuples | ||
|
||
def generate_sparql_query(uris): | ||
values_clause = "\n ".join(f"<{uri}>" for uri in uris) | ||
return f"""PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> | ||
PREFIX obo: <http://purl.obolibrary.org/obo/> | ||
SELECT ?class (COUNT(DISTINCT ?subclass) AS ?subclassCount) | ||
WHERE {{ | ||
VALUES ?class {{ | ||
{values_clause} | ||
}} | ||
?subclass rdfs:subClassOf ?class . | ||
FILTER (?subclass != ?class) | ||
}} | ||
GROUP BY ?class | ||
ORDER BY DESC(?subclassCount)""" | ||
|
||
def execute_sparql_query(query): | ||
endpoint = "https://sparql.hegroup.org/sparql" | ||
headers = { | ||
"Accept": "application/sparql-results+json", | ||
"Content-Type": "application/x-www-form-urlencoded" | ||
} | ||
data = { | ||
"query": query, | ||
"format": "json" | ||
} | ||
response = requests.post(endpoint, headers=headers, data=data) | ||
return response.json() | ||
|
||
# Usage | ||
pairs = read_uris_from_file('cob.tsv') | ||
uris = [p[0] for p in pairs] | ||
query = generate_sparql_query(uris) | ||
|
||
results = execute_sparql_query(query) | ||
|
||
# Process and print results | ||
print("Class\tLabel\tSubclass Count") | ||
for result in results['results']['bindings']: | ||
class_uri = result['class']['value'] | ||
# label = result['label']['value'] | ||
label = labels[class_uri] | ||
count = result['subclassCount']['value'] | ||
print(f"{class_uri}\t{label}\t{count}") | ||
del labels[class_uri] | ||
for class_uri, label in labels.items(): | ||
print(f"{class_uri}\t{label}\t0") |