-
Notifications
You must be signed in to change notification settings - Fork 4
Breed4Food: example SPARQL queries
Namespace prefixes
- some boilerplate (e.g. check prefix.cc lookup service)
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
- domain-specific
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX sio: <http://semanticscience.org/resource/>
PREFIX qtldb: <http://identifiers.org/pigQTLdb/>
PREFIX pubmed: <http://identifiers.org/pubmed/>
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
PREFIX prosite: <http://purl.uniprot.org/prosite/>
PREFIX prints: <http://purl.uniprot.org/prints/>
PREFIX pirsf: <http://purl.uniprot.org/pirsf/>
PREFIX superfamily: <http://purl.uniprot.org/supfam/>
PREFIX tigrfam: <http://purl.uniprot.org/tigrfams/>
PREFIX pfam: <http://purl.uniprot.org/pfam/>
PREFIX smart: <http://purl.uniprot.org/smart/>
PREFIX ensembl: <http://rdf.ebi.ac.uk/resource/ensembl/>
PREFIX transcript: <http://rdf.ebi.ac.uk/resource/ensembl.transcript/>
PREFIX protein: <http://rdf.ebi.ac.uk/resource/ensembl.protein/>
PREFIX exon: <http://rdf.ebi.ac.uk/resource/ensembl.exon/>
PREFIX term: <http://rdf.ebi.ac.uk/terms/ensembl/>
PREFIX taxon: <http://identifiers.org/taxonomy/>
1. Count all QTLs in pig QTLdb.
PREFIX obo: <http://purl.obolibrary.org/obo/>
SELECT
COUNT(*) AS ?n
FROM <http://www.animalgenome.org/QTLdb/pig> {
?qtl a obo:SO_0000771
}
n |
---|
15905 |
2. Count QTLs associated with the trait of interest only (i.e. nipple quantity/teat number via VT/CMO ontologies).
PREFIX obo: <http://purl.obolibrary.org/obo/>
SELECT
COUNT(DISTINCT ?qtl) AS ?n
FROM <http://www.animalgenome.org/QTLdb/pig> {
?qtl a obo:SO_0000771 ;
obo:RO_0002610 ?trait .
FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
}
n |
---|
258 |
3. Count only those QTLs associated with the trait which overlap with protein-coding genes.
PREFIX obo: <http://purl.obolibrary.org/obo/>
SELECT
COUNT(DISTINCT ?qtl) AS ?n
FROM <http://www.animalgenome.org/QTLdb/pig> {
?qtl a obo:SO_0000771 ;
obo:RO_0002610 ?trait ;
obo:SO_overlaps ?gene .
FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
}
n |
---|
173 |
4. Count only those QTLs associated with the trait which do NOT overlap with protein-coding genes.
PREFIX obo: <http://purl.obolibrary.org/obo/>
SELECT
COUNT(DISTINCT ?qtl) AS ?n
FROM <http://www.animalgenome.org/QTLdb/pig> {
?qtl a obo:SO_0000771 ;
obo:RO_0002610 ?trait .
MINUS { ?qtl obo:SO_overlaps ?gene } .
FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
}
n |
---|
85 |
5. Count QTLs across pig chromosomes (regardless of the trait of interest).
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX obo: <http://purl.obolibrary.org/obo/>
SELECT
?chr
COUNT(DISTINCT ?qtl) AS ?n
FROM <http://www.animalgenome.org/QTLdb/pig> {
?qtl a obo:SO_0000771 .
?qtl obo:SO_part_of/rdfs:label ?chr_lb.
BIND(replace(?chr_lb, '.+\\.', '') AS ?chr)
}
GROUP BY ?chr
ORDER BY DESC(?n)
chr | n |
---|---|
1 | 2072 |
7 | 1732 |
2 | 1722 |
6 | 1397 |
4 | 1181 |
14 | 938 |
8 | 870 |
13 | 761 |
12 | 672 |
15 | 651 |
9 | 646 |
3 | 603 |
5 | 581 |
16 | 495 |
X | 406 |
10 | 317 |
17 | 288 |
18 | 287 |
11 | 285 |
Y | 1 |
6. List top 3 chromosomes with most QTLs associated with the trait.
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX obo: <http://purl.obolibrary.org/obo/>
SELECT
?chr
COUNT(DISTINCT ?qtl) AS ?n
FROM <http://www.animalgenome.org/QTLdb/pig> {
?qtl a obo:SO_0000771 ;
obo:SO_part_of/rdfs:label ?chr_lb ;
obo:RO_0002610 ?trait .
BIND(replace(?chr_lb, '.+\\.', '') AS ?chr)
FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
}
GROUP BY ?chr
ORDER BY DESC(?n)
LIMIT 3
chr | n |
---|---|
7 | 54 |
8 | 49 |
1 | 22 |
7. Count articles/PubMed IDs describing QTLs associated with the trait.
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX dcterms: <http://purl.org/dc/terms/>
SELECT
COUNT(DISTINCT ?pmid) AS ?n
FROM <http://www.animalgenome.org/QTLdb/pig> {
?qtl a obo:SO_0000771 ;
obo:RO_0002610 ?trait ;
dcterms:isReferencedBy ?pmid .
FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
}
n |
---|
20 |
8. List top 3 articles describing most QTLs associated with the trait.
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX dcterms: <http://purl.org/dc/terms/>
SELECT
?pubmed
COUNT(?qtl) AS ?n
FROM <http://www.animalgenome.org/QTLdb/pig> {
?qtl a obo:SO_0000771 ;
obo:RO_0002610 ?trait ;
dcterms:isReferencedBy ?pmid .
?pmid rdfs:label ?lb
BIND(concat('[', ?lb,'](', ?pmid, ')') AS ?pubmed)
FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
}
GROUP BY ?pubmed
ORDER BY DESC(?n)
LIMIT 3
pubmed | n |
---|---|
PMID:26830357 | 130 |
PMID:24981054 | 78 |
PMID:26202474 | 60 |
9. List pig breeds sorted by the number of trait-associated QTLs.
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX obo: <http://purl.obolibrary.org/obo/>
SELECT
str(?breed_name) AS ?breed_name
COUNT(?qtl) AS ?n
WHERE {
GRAPH <http://www.animalgenome.org/QTLdb/pig> {
?qtl a obo:SO_0000771 ;
obo:RO_0002610 ?trait ;
obo:RO_0002162 ?breed .
FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
}
GRAPH <http://purl.bioontology.org/ontology/LBO> {
?breed rdfs:label ?breed_name
}
}
GROUP BY ?breed_name
ORDER BY DESC(?n)
breed_name | n |
---|---|
Large White | 310 |
Duroc | 116 |
Meishan | 66 |
Erhualian | 56 |
Landrace | 48 |
Pietrain | 34 |
Yorkshire | 4 |
10. Count unique genes (from Ensembl) in all QTLs associated with the trait.
PREFIX obo: <http://purl.obolibrary.org/obo/>
SELECT
COUNT(DISTINCT ?gene) AS ?n
FROM <http://www.animalgenome.org/QTLdb/pig> {
?qtl a obo:SO_0000771 ;
obo:SO_overlaps ?gene ;
obo:RO_0002610 ?trait .
FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
}
n |
---|
10165 |
11. Count unique protein accessions (from UniProt) referenced in Ensembl.
PREFIX sio: <http://semanticscience.org/resource/>
SELECT
COUNT(DISTINCT ?uniprot_acc) AS ?n
FROM <http://www.ensembl.org/pig>
WHERE {
?s a <http://identifiers.org/uniprot> ;
sio:SIO_000671 [ sio:SIO_000300 ?uniprot_acc ]
FILTER regex(?uniprot_acc, '^[A-Z0-9]+$')
}
n |
---|
25202 |
12. Count unique protein accessions (from UniProt) referenced via Ensembl and pig QTLdb for the trait of interest.
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT
COUNT(DISTINCT ?xref) AS ?n
WHERE {
GRAPH <http://www.ensembl.org/pig> {
?transcript obo:SO_transcribed_from ?gene ;
obo:SO_translates_to ?protein .
?protein rdfs:seeAlso ?xref .
FILTER regex(?xref, 'http://identifiers.org/uniprot')
}
GRAPH <http://www.animalgenome.org/QTLdb/pig> {
?qtl a obo:SO_0000771 ;
obo:SO_overlaps ?gene ;
obo:RO_0002610 ?trait .
FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
}
}
n |
---|
11667 |
13. List top 3 QTLs associated with the trait and with most genes.
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX dcterms: <http://purl.org/dc/terms/>
SELECT
?qtldb
COUNT(?gene) AS ?n
FROM <http://www.animalgenome.org/QTLdb/pig> {
?qtl a obo:SO_0000771 ;
rdfs:label ?qtl_id ;
dcterms:source ?db ;
obo:SO_overlaps ?gene ;
obo:SO_part_of/rdfs:label ?chr ;
obo:RO_0002610 ?trait .
BIND(concat('[', ?qtl_id, '](', ?db, ')') AS ?qtldb)
FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
}
GROUP BY ?qtldb
ORDER BY DESC(?n)
LIMIT 3
qtldb | n |
---|---|
QTL:5226 | 3034 |
QTL:5223 | 2546 |
QTL:5257 | 2340 |
14. List genes in QTLs associated with the trait.
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX obo: <http://purl.obolibrary.org/obo/>
SELECT
DISTINCT(?qtldb) AS ?qtldb
str(?gene_symbol) AS ?gene_symbol
WHERE {
GRAPH <http://www.animalgenome.org/QTLdb/pig> {
?qtl a obo:SO_0000771 ;
dcterms:source ?db ;
rdfs:label ?qtl_id ;
obo:SO_overlaps ?gene ;
obo:RO_0002610 ?trait .
BIND(concat('[', ?qtl_id, '](', ?db, ')') AS ?qtldb) .
FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
}
GRAPH <http://www.ensembl.org/pig> {
?gene rdfs:label ?gene_symbol
}
}
ORDER BY ?qtldb
qtldb | gene_symbol |
---|---|
QTL:1100 | PRMT9 |
QTL:1100 | DKK2 |
QTL:1100 | LNX1 |
... |
(12856 rows in total)
15. List only those trait-associated QTLs with (pig) genes that have human orthologs referenced in OMIM (incl. human orthologs and chromosomal location with a link to Ensembl genome browser).
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX sio: <http://semanticscience.org/resource/>
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
SELECT
DISTINCT(?qtldb) AS ?qtldb
str(?gene_symbol) as ?pig_gene
concat('[', ?gene_loc, '](http://www.ensembl.org/Sus_scrofa/Location/View?r=', replace(?gene_loc, 'chr.+ ', ''), ')') AS ?gene_loc
str(?human_ortholog) AS ?human_ortholog
concat('[', ?ortholog_loc, '](http://www.ensembl.org/Homo_sapiens/Location/View?r=', replace(?ortholog_loc, 'chr.+ ', ''), ')') AS ?ortholog_loc
concat('[', bif:upper(?id), '](', ?bio2rdf_omim, ')') AS ?omim
concat('[', ?lb,'](', ?pmid, ')') AS ?pubmed
WHERE {
GRAPH <http://www.animalgenome.org/QTLdb/pig> {
?qtl a obo:SO_0000771 ;
dcterms:source ?db ;
rdfs:label ?qtl_id ;
dcterms:isReferencedBy ?pmid ;
obo:SO_part_of ?chr ;
obo:SO_overlaps ?gene ;
obo:RO_0002610 ?trait .
?pmid rdfs:label ?lb .
FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473)) .
BIND(concat('[', ?qtl_id, '](', ?db, ')') AS ?qtldb)
}
GRAPH <http://www.ensembl.org/pig> {
?gene rdfs:label ?gene_symbol ;
sio:SIO_000558 ?ortholog ;
faldo:location/rdfs:label ?gene_loc
}
GRAPH <http://www.ensembl.org/human> {
?ortholog obo:RO_0002331 ?bio2rdf_omim ;
rdfs:label ?human_ortholog ;
faldo:location/rdfs:label ?ortholog_loc
}
GRAPH <http://bio2rdf.org/omim_resource:bio2rdf.dataset.omim.R4> {
?bio2rdf_omim dcterms:identifier ?id
}
}
ORDER BY ?qtldb
qtldb | pig_gene | gene_loc | human_ortholog | ortholog_loc | omim | pubmed |
---|---|---|---|---|---|---|
QTL:17547 | HDAC1 | chromosome 6:82882051-82908171:-1 | HDAC1 [omim:601241] | chromosome 1:32292086-32333635:1 | OMIM:601241 | PMID:21108822 |
QTL:17547 | HEYL | chromosome 6:88530827-88547931:-1 | HEYL [omim:609034] | chromosome 1:39624153-39639945:-1 | OMIM:609034 | PMID:21108822 |
QTL:17547 | B4GALT6 | chromosome 6:108153077-108219519:-1 | B4GALT6 [omim:604017] | chromosome 18:31622247-31685836:-1 | OMIM:604017 | PMID:21108822 |
...
(9402 rows in total)
ODEX4all