Skip to content

Breed4Food: example SPARQL queries

Arnold Kuzniar edited this page Mar 2, 2017 · 37 revisions

Namespace prefixes

  • some boilerplate (e.g. check prefix.cc lookup service)
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
  • domain-specific
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX sio: <http://semanticscience.org/resource/>
PREFIX qtldb: <http://identifiers.org/pigQTLdb/>
PREFIX pubmed: <http://identifiers.org/pubmed/>
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
PREFIX prosite: <http://purl.uniprot.org/prosite/>
PREFIX prints: <http://purl.uniprot.org/prints/>
PREFIX pirsf: <http://purl.uniprot.org/pirsf/>
PREFIX superfamily: <http://purl.uniprot.org/supfam/>
PREFIX tigrfam: <http://purl.uniprot.org/tigrfams/>
PREFIX pfam: <http://purl.uniprot.org/pfam/>
PREFIX smart: <http://purl.uniprot.org/smart/>
PREFIX ensembl: <http://rdf.ebi.ac.uk/resource/ensembl/>
PREFIX transcript: <http://rdf.ebi.ac.uk/resource/ensembl.transcript/>
PREFIX protein: <http://rdf.ebi.ac.uk/resource/ensembl.protein/>
PREFIX exon: <http://rdf.ebi.ac.uk/resource/ensembl.exon/>
PREFIX term: <http://rdf.ebi.ac.uk/terms/ensembl/>
PREFIX taxon: <http://identifiers.org/taxonomy/>

1. Count all QTLs in pig QTLdb.

PREFIX obo: <http://purl.obolibrary.org/obo/>

SELECT
   COUNT(*) AS ?n
FROM <http://www.animalgenome.org/QTLdb/pig> {
   ?qtl a obo:SO_0000771
}
n
15905

2. Count QTLs associated with the trait of interest only (i.e. nipple quantity/teat number via VT/CMO ontologies).

PREFIX obo: <http://purl.obolibrary.org/obo/>

SELECT
   COUNT(DISTINCT ?qtl) AS ?n
FROM <http://www.animalgenome.org/QTLdb/pig> {
   ?qtl a obo:SO_0000771 ;
      obo:RO_0002610 ?trait .
   FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
}
n
258

3. Count only those QTLs associated with the trait which overlap with protein-coding genes.

PREFIX obo: <http://purl.obolibrary.org/obo/>

SELECT
   COUNT(DISTINCT ?qtl) AS ?n
FROM <http://www.animalgenome.org/QTLdb/pig> {
   ?qtl a obo:SO_0000771 ;
      obo:RO_0002610 ?trait ;
      obo:SO_overlaps ?gene .
   FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
}
n
173

4. Count only those QTLs associated with the trait which do NOT overlap with protein-coding genes.

PREFIX obo: <http://purl.obolibrary.org/obo/>

SELECT
   COUNT(DISTINCT ?qtl) AS ?n
FROM <http://www.animalgenome.org/QTLdb/pig> {
   ?qtl a obo:SO_0000771 ;
      obo:RO_0002610 ?trait .
   MINUS { ?qtl obo:SO_overlaps ?gene } .
   FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
}
n
85

5. Count QTLs across pig chromosomes (regardless of the trait of interest).

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX obo: <http://purl.obolibrary.org/obo/>

SELECT
   ?chr
   COUNT(DISTINCT ?qtl) AS ?n
FROM <http://www.animalgenome.org/QTLdb/pig> {
   ?qtl a obo:SO_0000771 .
   ?qtl obo:SO_part_of/rdfs:label ?chr_lb.
   BIND(replace(?chr_lb, '.+\\.', '') AS ?chr)
}
GROUP BY ?chr
ORDER BY DESC(?n)
chr n
1 2072
7 1732
2 1722
6 1397
4 1181
14 938
8 870
13 761
12 672
15 651
9 646
3 603
5 581
16 495
X 406
10 317
17 288
18 287
11 285
Y 1

6. List top 3 chromosomes with most QTLs associated with the trait.

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX obo: <http://purl.obolibrary.org/obo/>

SELECT
   ?chr
   COUNT(DISTINCT ?qtl) AS ?n
FROM <http://www.animalgenome.org/QTLdb/pig> {
   ?qtl a obo:SO_0000771 ;
      obo:SO_part_of/rdfs:label ?chr_lb ;
      obo:RO_0002610 ?trait .
   BIND(replace(?chr_lb, '.+\\.', '') AS ?chr)
   FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
}
GROUP BY ?chr
ORDER BY DESC(?n)
LIMIT 3
chr n
7 54
8 49
1 22

7. Count articles/PubMed IDs describing QTLs associated with the trait.

PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX dcterms: <http://purl.org/dc/terms/>

SELECT
   COUNT(DISTINCT ?pmid) AS ?n
FROM <http://www.animalgenome.org/QTLdb/pig> {
   ?qtl a obo:SO_0000771 ;
      obo:RO_0002610 ?trait ;
      dcterms:isReferencedBy ?pmid .
   FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
}
n
20

8. List top 3 articles describing most QTLs associated with the trait.

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX dcterms: <http://purl.org/dc/terms/>

SELECT
   ?pubmed
   COUNT(?qtl) AS ?n
FROM <http://www.animalgenome.org/QTLdb/pig> {
   ?qtl a obo:SO_0000771 ;
      obo:RO_0002610 ?trait ;
      dcterms:isReferencedBy ?pmid .
   ?pmid rdfs:label ?lb
   BIND(concat('[', ?lb,'](', ?pmid, ')') AS ?pubmed)
   FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
}
GROUP BY ?pubmed
ORDER BY DESC(?n)
LIMIT 3
pubmed n
PMID:26830357 130
PMID:24981054 78
PMID:26202474 60

9. List pig breeds sorted by the number of trait-associated QTLs.

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX obo: <http://purl.obolibrary.org/obo/>

SELECT
   str(?breed_name) AS ?breed_name
   COUNT(?qtl) AS ?n
WHERE {
   GRAPH <http://www.animalgenome.org/QTLdb/pig> {
      ?qtl a obo:SO_0000771 ;
         obo:RO_0002610 ?trait ;
         obo:RO_0002162 ?breed .
      FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
   }
   GRAPH <http://purl.bioontology.org/ontology/LBO> {
      ?breed rdfs:label ?breed_name
   }
}
GROUP BY ?breed_name
ORDER BY DESC(?n)
breed_name n
Large White 310
Duroc 116
Meishan 66
Erhualian 56
Landrace 48
Pietrain 34
Yorkshire 4

10. Count unique genes (from Ensembl) in all QTLs associated with the trait.

PREFIX obo: <http://purl.obolibrary.org/obo/>

SELECT
   COUNT(DISTINCT ?gene) AS ?n
FROM <http://www.animalgenome.org/QTLdb/pig> {
   ?qtl a obo:SO_0000771 ;
      obo:SO_overlaps ?gene ;
      obo:RO_0002610 ?trait .
   FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
}
n
10165

11. Count unique protein accessions (from UniProt) referenced in Ensembl.

PREFIX sio: <http://semanticscience.org/resource/>

SELECT
   COUNT(DISTINCT ?uniprot_acc) AS ?n
FROM <http://www.ensembl.org/pig>
WHERE {
   ?s a <http://identifiers.org/uniprot> ;
      sio:SIO_000671 [ sio:SIO_000300 ?uniprot_acc ]
   FILTER regex(?uniprot_acc, '^[A-Z0-9]+$')
}
n
25202

12. Count unique protein accessions (from UniProt) referenced via Ensembl and pig QTLdb for the trait of interest.

PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT
   COUNT(DISTINCT ?xref) AS ?n
WHERE {
   GRAPH <http://www.ensembl.org/pig> {
      ?transcript obo:SO_transcribed_from ?gene ;
         obo:SO_translates_to ?protein .
      ?protein rdfs:seeAlso ?xref .
      FILTER regex(?xref, 'http://identifiers.org/uniprot')
   }
   GRAPH <http://www.animalgenome.org/QTLdb/pig> {
      ?qtl a obo:SO_0000771 ;
      obo:SO_overlaps ?gene ;
         obo:RO_0002610 ?trait .
      FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
   }
}
n
11667

13. List top 3 QTLs associated with the trait and with most genes.

PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX dcterms: <http://purl.org/dc/terms/>

SELECT
   ?qtldb
   COUNT(?gene) AS ?n
FROM <http://www.animalgenome.org/QTLdb/pig> {
   ?qtl a obo:SO_0000771 ;
      rdfs:label ?qtl_id ;
      dcterms:source ?db ;
      obo:SO_overlaps ?gene ;
      obo:SO_part_of/rdfs:label ?chr ;
      obo:RO_0002610 ?trait .
   BIND(concat('[', ?qtl_id, '](', ?db, ')') AS ?qtldb)
   FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
}
GROUP BY ?qtldb
ORDER BY DESC(?n)
LIMIT 3
qtldb n
QTL:5226 3034
QTL:5223 2546
QTL:5257 2340

14. List genes in QTLs associated with the trait.

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX obo: <http://purl.obolibrary.org/obo/>

SELECT
   DISTINCT(?qtldb) AS ?qtldb
   str(?gene_symbol) AS ?gene_symbol
WHERE {
   GRAPH <http://www.animalgenome.org/QTLdb/pig> {
      ?qtl a obo:SO_0000771 ;
         dcterms:source ?db ;
         rdfs:label ?qtl_id ;
         obo:SO_overlaps ?gene ;
         obo:RO_0002610 ?trait .
      BIND(concat('[', ?qtl_id, '](', ?db, ')') AS ?qtldb) .
      FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473))
   }
   GRAPH <http://www.ensembl.org/pig> {
      ?gene rdfs:label ?gene_symbol
   }
}
ORDER BY ?qtldb
qtldb gene_symbol
QTL:1100 PRMT9
QTL:1100 DKK2
QTL:1100 LNX1
...

(12856 rows in total)

15. List only those trait-associated QTLs with (pig) genes that have human orthologs referenced in OMIM (incl. human orthologs and chromosomal location with a link to Ensembl genome browser).

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX sio: <http://semanticscience.org/resource/>
PREFIX faldo: <http://biohackathon.org/resource/faldo#>

SELECT
   DISTINCT(?qtldb) AS ?qtldb
   str(?gene_symbol) as ?pig_gene
   concat('[', ?gene_loc, '](http://www.ensembl.org/Sus_scrofa/Location/View?r=', replace(?gene_loc, 'chr.+ ', ''), ')') AS ?gene_loc
   str(?human_ortholog) AS ?human_ortholog
   concat('[', ?ortholog_loc, '](http://www.ensembl.org/Homo_sapiens/Location/View?r=', replace(?ortholog_loc, 'chr.+ ', ''), ')') AS ?ortholog_loc
   concat('[', bif:upper(?id), '](', ?bio2rdf_omim, ')') AS ?omim
   concat('[', ?lb,'](', ?pmid, ')') AS ?pubmed
WHERE {
   GRAPH <http://www.animalgenome.org/QTLdb/pig> {
      ?qtl a obo:SO_0000771 ;
         dcterms:source ?db ;
         rdfs:label ?qtl_id ;
         dcterms:isReferencedBy ?pmid ;
         obo:SO_part_of ?chr ;
         obo:SO_overlaps ?gene ;
         obo:RO_0002610 ?trait .
      ?pmid rdfs:label ?lb .
      FILTER(?trait IN (obo:VT_1000206, obo:CMO_0000445, obo:CMO_0000472, obo:CMO_0000473)) .
      BIND(concat('[', ?qtl_id, '](', ?db, ')') AS ?qtldb)
   }
   GRAPH <http://www.ensembl.org/pig> {
      ?gene rdfs:label ?gene_symbol ;
         sio:SIO_000558 ?ortholog ;
         faldo:location/rdfs:label ?gene_loc
   }
   GRAPH <http://www.ensembl.org/human> {
      ?ortholog obo:RO_0002331 ?bio2rdf_omim ;
         rdfs:label ?human_ortholog ;
         faldo:location/rdfs:label ?ortholog_loc
   }
   GRAPH <http://bio2rdf.org/omim_resource:bio2rdf.dataset.omim.R4> {
      ?bio2rdf_omim dcterms:identifier ?id
   }
}
ORDER BY ?qtldb
qtldb pig_gene gene_loc human_ortholog ortholog_loc omim pubmed
QTL:17547 HDAC1 chromosome 6:82882051-82908171:-1 HDAC1 [omim:601241] chromosome 1:32292086-32333635:1 OMIM:601241 PMID:21108822
QTL:17547 HEYL chromosome 6:88530827-88547931:-1 HEYL [omim:609034] chromosome 1:39624153-39639945:-1 OMIM:609034 PMID:21108822
QTL:17547 B4GALT6 chromosome 6:108153077-108219519:-1 B4GALT6 [omim:604017] chromosome 18:31622247-31685836:-1 OMIM:604017 PMID:21108822

...

(9402 rows in total)