-
Notifications
You must be signed in to change notification settings - Fork 4
/
convert.py
executable file
·1418 lines (1148 loc) · 44.3 KB
/
convert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
# * Use unidecode to make skos terms from labels (in uatbridge)
"""
A script to convert the CSV input format to various outputs.
Dependencies: python3, python3-rdflib, skosify (not packaged yet; see
https://pypi.org/project/skosify/)
See Appendix A of Vocabularies in the VO 2 for what this is and what
it's for.
This program is in the public domain.
In case of problems, please contact Markus Demleitner
"""
from configparser import ConfigParser
from xml.etree import ElementTree as etree
import contextlib
import csv
import itertools
import json
import os
import re
import subprocess
import textwrap
import shutil
import sys
import urllib.parse
import weakref
import rdflib
try:
import skosify
from rdflib.term import URIRef
except ImportError:
sys.stderr.write("skosify and/or rdflib python modules missing;"
" this will break as soon as SKOS vocabularies are processed.\n")
# Minimal required keys for a vocabulary construction
VOCABULARY_MANDATORY_KEYS = frozenset([
"name", "timestamp", "title", "description", "authors"])
# this is defined in Vocabularies in the VO 2
KNOWN_PREDICATES = frozenset([
"ivoasem:preliminary", "ivoasem:deprecated", "ivoasem:useInstead",
"rdfs:subClassOf",
"rdfs:subPropertyOf",
"skos:broader", "skos:exactMatch",
# well, this one isn't quite in VocInVO2 in late 2020. Let's see.
"skos:related",
# ..and neither is this (which we need of facilities
"skos:altLabel"])
# an RE our term URIs must match (we're not very diligent yet)
FULL_TERM_PATTERN = "[\w\d#:/_.*%-]+"
# an RE our terms themselves must match
TERM_PATTERN = "[\w\d_-]+"
IVOA_RDF_URI = "http://www.ivoa.net/rdf/"
HT_ACCESS_TEMPLATE_CSV = """# .htaccess for content negotiation
# This file is patterned after Recipe 3 in the W3C document 'Best
# Practice Recipes for Publishing RDF Vocabularies', at
# <http://www.w3.org/TR/swbp-vocab-pub/>
AddType application/rdf+xml .rdf
AddType text/turtle .ttl
AddType application/x-desise+json .desise
AddCharset UTF-8 .ttl
AddCharset UTF-8 .html
AddCharset UTF-8 .desise
RewriteEngine On
RewriteBase {install_base}
RewriteCond %{{HTTP_ACCEPT}} application/rdf\\+xml
RewriteRule ^$ {timestamp}/{name}.rdf [R=303]
RewriteCond %{{HTTP_ACCEPT}} text/turtle
RewriteRule ^$ {timestamp}/{name}.ttl [R=303]
RewriteCond %{{HTTP_ACCEPT}} application/x-desise\\+json
RewriteRule ^$ {timestamp}/{name}.desise [R=303]
# No accept conditions: make the .html version the default
RewriteRule ^$ {timestamp}/{name}.html [R=303]
"""
TTL_HEADER_TEMPLATE = """@base {baseuri}.
@prefix : <#>.
@prefix dc: <http://purl.org/dc/terms/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/>.
@prefix ivoasem: <http://www.ivoa.net/rdf/ivoasem#>.
@prefix skos: <http://www.w3.org/2004/02/skos/core#>.
<> a owl:Ontology;
dc:created {timestamp};
dc:creator {creators};
dc:license {licenseuri};
rdfs:label {title}@en;
dc:title {title}@en;
dc:description {description};
ivoasem:vocflavour {flavour}.
dc:created a owl:AnnotationProperty.
dc:creator a owl:AnnotationProperty.
dc:title a owl:AnnotationProperty.
dc:description a owl:AnnotationProperty.
"""
JAVASCRIPT = """
"""
CSS_STYLE = """
html {
font-family: sans;
}
h1 {
margin-bottom: 3ex;
border-bottom: 2pt solid #ccc;
}
tr {
padding-top: 2pt;
padding-bottom: 2pt;
border-bottom: 1pt solid #ccc;
}
tr:target {
border: 2pt solid yellow;
}
thead tr {
border-top: 1pt solid black;
border-bottom: 1pt solid black;
}
th {
padding: 4pt;
}
.intro {
max-width: 30em;
margin-bottom: 5ex;
margin-left: 2ex;
}
.outro {
max-width: 30em;
margin-top: 4ex;
}
table {
border-collapse: collapse;
border-bottom: 1pt solid black;
}
tr {
padding: 5pt;
}
td {
vertical-align: top;
padding: 5pt;
}
th:nth-child(1),
td:nth-child(1) {
background: #eef;
}
th:nth-child(3),
td:nth-child(3) {
background: #eef;
}
th:nth-child(5),
td:nth-child(5) {
background: #eef;
}
.draftwarning {
border-left: 3pt solid red;
padding-left: 6pt;
}
ul.compactlist {
list-style-type: none;
padding-left: 0pt;
margin-block-start: 0pt;
margin-block-end: 0pt;
}
ul.compactlist li {
margin-bottom: 0.3ex;
}
label.popup {
position: relative;
}
input.popup-control {
display: none;
}
.popup-head {
display: inline-block;
}
.popup-body {
display: none;
}
.popup-control:checked ~ .popup-body {
display: block;
position: absolute;
top: 0pt;
left: 0pt;
background: white;
border: 1pt solid #555;
z-index: 500;
padding: 0.4rem 0.2rem;
width: 20rem;
}
.proplabel {
display: inline-block;
position: relative;
background: #442266;
color: white;
padding: 0.4rem 0.4rem;
border-radius: 0.2em;
white-space: nowrap;
margin: 0.5rem 0.2rem;
}
#license {
margin-top: 2rem;
background-color: #ccc;
padding: 0.5rem;
font-size: 80%;
}
/* to a bit lighter with link underlines: we have a high link density
in our documents (and I don't care if this is a no-op on old browsers */
td a {
text-decoration-color: transparent;
}
td a:hover {
text-decoration-color: currentcolor;
transition: all 0.2s ease-in;
}
"""
DEFAULT_LICENSE_HTML = """This vocabulary is made available under
<a href="">CC-0</a> by the <a
href="https://wiki.ivoa.net/twiki/bin/view/IVOA/IvoaSemantics">IVOA
Semantics Working Group</a>. To learn how to improve and amend this
vocabulary, see <a href="http://ivoa.net/documents/Vocabularies/20200326/"
>Vocabularies in the VO 2</a>."""
class ReportableError(Exception):
"""is raised for expected and explainable error conditions.
All other exceptions lead to tracbacks for further debugging.
"""
############ some utility functions
@contextlib.contextmanager
def work_dir(dir_name, clear_first=False):
"""a context manager for temporarily working in dir_name.
dir_name, if non-existing, is created. If clear_first=True is passed,
the directory will be removed and re-created.
"""
if clear_first and os.path.isdir(dir_name):
shutil.rmtree(dir_name)
if not os.path.isdir(dir_name):
os.makedirs(dir_name)
owd = os.getcwd()
os.chdir(dir_name)
try:
yield
finally:
os.chdir(owd)
def is_URI(s):
"""returns True if we believe s is a URI.
This is a simple, RE-based heuristic.
"""
return bool(re.match("[a-zA-Z]+://|#", s))
def append_with_sep(l, item, sep):
"""appends item to l, preceding it with sep if non-empty.
This lets one emulate the ", ".join pattern for non-string lists.
"""
if l:
l.append(sep)
l.append(item)
def pick_exactly_one(iter, errmsg, default=None):
"""returns the element in iter when there is only one.
It raises an error with errmsg as an explanation otherwise.
If default is non-None, it will be returned in case of an empty iter.
"""
res = list(iter)
if len(res)==0:
if default is not None:
return default
raise ReportableError("Expected exactly one {} but got 0".format(
errmsg))
elif len(res)==1:
return res[0]
else:
raise ReportableError("Expected exactly one {} but got {}".format(
errmsg, len(res)))
def _expand_transitively(rn, cur_term, to_process):
"""helps close_transitively.
See the explanation of the strategy there.
"""
for narrower_term in rn.get(cur_term, []):
if narrower_term in to_process:
_expand_transitively(rn, narrower_term, to_process)
to_process.remove(narrower_term)
rn[cur_term].extend(rn.get(narrower_term, []))
def close_transitively(raw_narrower):
"""closes raw_narrower transitively.
raw_narrower is a dict of lists; for every item i in a value list,
that list is expanded by raw_narrower[i].
This helps add_desise_narrowser in the case of non-SKOS vocabularies;
it will not do anything sensible if d doesn't describe a tree. In
particular, it will not detect cycles and may go down in flames if
there are any.
"""
# our strategy: Pick a term to process and expand it and the subtree
# below it post-order, removing anything visited from from our to-do list.
# Repeat until we're done.
to_process = set(raw_narrower)
while to_process:
_expand_transitively(raw_narrower, to_process.pop(), to_process)
def invert_wider(voc):
"""returns the inverse of the wider relationship on voc.
This is either the simple inversion of wider in the SKOS
case (where arbitrary graphs are possible and wider isn't transitive
anyway) or its transitive closure (i.e., all terms reachable from t
when following the branches).
"""
inverted_wider = {}
for t, term in voc.terms.items():
for wider in term.get_objects_for(voc.wider_predicate):
inverted_wider.setdefault(wider.lstrip("#"), []).append(t)
if voc.flavour not in ["SKOS", "SKOS CSV"]:
close_transitively(inverted_wider)
return inverted_wider
############ tiny DOM start (snarfed and simplified from DaCHS stanxml)
# (used to write HTML)
class _Element(object):
"""An element within a DOM.
Essentially, this is a simple way to build elementtrees. You can
reach the embedded elementtree Element as node.
Add elements, sequences, etc, using indexation, attributes using function
calls; names with dashes are written with underscores, python
reserved words have a trailing underscore.
"""
_generator_t = type((x for x in ()))
def __init__(self, name):
self.node = etree.Element(name)
def add_text(self, tx):
"""appends tx either the end of the current content.
"""
if len(self.node):
self.node[-1].tail = (self.node[-1].tail or "")+tx
else:
self.node.text = (self.node.text or "")+tx
def __getitem__(self, child):
if child is None:
return
elif isinstance(child, str):
self.add_text(child)
elif isinstance(child, (int, float)):
self.add_text(str(child))
elif isinstance(child, _Element):
self.node.append(child.node)
elif isinstance(child, etree.Element):
self.node.append(child)
elif hasattr(child, "__iter__"):
for c in child:
self[c]
else:
raise Exception("%s element %s cannot be added to %s node"%(
type(child), repr(child), self.node.tag))
return self
def __call__(self, **kwargs):
for k, v in kwargs.items():
if k.endswith("_"):
k = k[:-1]
k = k.replace("_", "-")
self.node.attrib[k] = v
return self
def dump(self, encoding="utf-8", dest_file=sys.stdout):
etree.ElementTree(self.node).write(
dest_file, encoding=encoding)
class _T(object):
"""a very simple templating engine.
Essentially, you get HTML elements by saying T.elementname, and
you'll get an _Element with that tag name.
This is supposed to be instanciated to a singleton (here, T).
"""
def __getattr__(self, key):
return _Element(key)
T = _T()
############ The term class and associated code
def make_ttl_literal(ob):
"""returns a turtle literal for an object.
Really, at this point only strings and booleans are supported.
However, if something looks like a URI (see is_URI), it's going to
be treated as a URI; should we have an extra class for that?
"""
if isinstance(ob, bool):
return "true" if ob else "false"
if not isinstance(ob, str):
raise ValueError(f"Cannot make a literal from: {ob}")
if is_URI(ob):
return "<{}>".format(ob)
elif re.match(r"\w+:\w+", ob):
# TTL prefixed IRI, restricted to what we want to see.
return ob
else:
if "\n" in ob:
return '"""{}"""'.format(ob)
else:
return '"{}"'.format(ob.replace('"', '\\"'))
class Term(object):
"""A term in our vocabulary.
Terms are constructed with the vocabulary the term is in
and the items from the CSV as per Appendix A, except that
parent terms are already resolved by the CSV parser.
self.relations is a set of pairs of (predicate, object),
where None in object is a blank node.
"""
def __init__(self,
vocabulary,
term,
label,
description,
parent=None,
more_relations=None):
if not re.match(TERM_PATTERN+"$", term):
raise ReportableError("Term fragment {} does not match IVOA"
" constraints.".format(term))
self.relations = set([])
self.vocabulary = weakref.proxy(vocabulary)
self.term, self.label = term, label
self.description = description
if self.vocabulary.draft:
self._add_relation("ivoasem:preliminary", None)
if parent:
self._set_parent_term(parent)
if more_relations:
try:
self._parse_relations(more_relations)
except Exception as msg:
raise ReportableError("While parsing relations of"
f" {term}: {msg}")
if not self.term:
raise ValueError("Term with empty identifier")
if not self.label.strip():
raise ValueError(f"Term {self.term} has no label")
def _add_relation(self, predicate, object):
"""adds a relation (self, predicate, object).
This does some additional validation on what predicate is
and thus should always be used in preference to directly
appending to relations.
"""
if not predicate in KNOWN_PREDICATES:
raise ReportableError("Unknown predicate in ({}, {}, {})"
.format(self.term, predicate, object))
self.relations.add((predicate, object))
def _set_parent_term(self, parent_term):
"""adds a triple declaring parent_term as "wider".
The predicate here depends on the vocabulary flavour.
There is a special case here for skos; there, parent_term
can be a list, and a term can have multiple parents.
"""
if (self.vocabulary.wider_predicate=="skos:broader"
and isinstance(parent_term, list)):
for term in parent_term:
self._add_relation(
self.vocabulary.wider_predicate, term)
else:
self._add_relation(
self.vocabulary.wider_predicate, parent_term)
@staticmethod
def _iter_relationship_literals(relations):
"""yields pairs of (predicate, object) for our relationship
input format.
That's a space-separated sequence of either predicate names or
predicate-name(object-spec) specifications, where object-spec
has balanced parentheses.
The actual interpretation of object-spec happens in _parse_relations
and by is_URI. This should probably be improved to be less
ad-hoc.
And if our grammar gets any more complex, we should use a proper
parser generator.
"""
predicate, token_stack = None, None
for mat in re.finditer(r"\(|\)|[^()]+", relations):
token = mat.group(0).strip()
if predicate is None:
if not re.match(FULL_TERM_PATTERN+"$", token):
raise ValueError("Invalid predicate at {}: {}".format(
mat.start(), token))
predicate = token
else:
# we have a predicate...
if token_stack is None:
# ...and are not parsing an argument
if token=='(':
token_stack = []
elif token==')':
raise ValueError("Unexpected ) at {}".format(
mat.start()))
else:
# current predicate has no object
yield predicate, None
if not re.match(FULL_TERM_PATTERN+"$", token):
raise ValueError(
"Invalid predicate at {}: {}"
.format(mat.start(), token))
predicate = token
else:
# ...we are parsing argument
if token=='(':
token_stack.append(token)
elif token==')':
arg = token_stack.pop()+')'
if token_stack:
token_stack[-1] += arg
else:
# argument complete, reset parser
yield predicate, arg[:-1]
predicate, token_stack = None, None
else:
# don't discard whitespace here
token_stack.append(mat.group())
if predicate:
# yield a singleton if you don't have yet
yield predicate, None
def _parse_relations(self, relations):
"""adds relations passed in through the last column of our CSV.
This parses {predicate[(object)]}.
"""
for predicate, obj in self._iter_relationship_literals(relations):
# a little hack: URI-fy plain objects by making them part of
# the current vocabulary
if obj and re.match(TERM_PATTERN+"$", obj):
obj = "#"+obj
self._add_relation(predicate, obj)
def get_objects_for(self, predicate):
"""yields term names for which (predicate term) is in
relationships.
"""
for pred, term in self.relations:
if pred==predicate:
yield term
def as_ttl(self):
"""returns a turtle representation of this term in a string.
"""
fillers = {
"term": self.term,
"label": make_ttl_literal(self.label),
"comment": make_ttl_literal(self.description or "N/D"),
"term_type": self.vocabulary.term_class,
"label_property": self.vocabulary.label_property,
"description_property": self.vocabulary.description_property,
}
template = [
"<#{term}> a {term_type}",
"{label_property} {label}",
"{description_property} {comment}"]
for predicate, object in self.relations:
if object is None:
object = ":__"
template.append("{} {}".format(
predicate,
make_ttl_literal(object)))
return ";\n ".join(template).format(**fillers)+"."
def _format_term_as_html(self, term):
"""returns HTML for a term.
This is going to be a link if the term exists in the parent
vocabulary, or, for now, just the term.
Passing in None (the blank node) is ok, too. You'll get back None.
"""
if term is None:
return term
if is_URI(term):
return T.a(href=term)[term]
if term[0]=='#':
term = term[1:]
if term in self.vocabulary.terms:
return T.a(href="#"+term)[term]
else:
return term
def _format_more_relations(self):
"""yields HTML elements for the non-parent relationships
this term has.
We only select the relationships VocInVO2 talks about.
"""
for prop, label in [
("ivoasem:useInstead", "Use Instead"),
("ivoasem:deprecated", "Deprecated Term"),
("skos:exactMatch", "Same As"),
("skos:related", "Related"),
("built-in:narrower", "Narrower")]:
if prop=="built-in:narrower":
objs = [self._format_term_as_html(t)
for t in sorted(self.vocabulary.inverted_wider.get(
self.term, []))]
else:
objs = [self._format_term_as_html(ob)
for ob in self.get_objects_for(prop)]
if objs:
# we have the property...
non_nulls = [o for o in objs if o is not None]
if non_nulls:
# ...and the property has non-blank objects
yield T.label(class_="popup")[
T.input(type="checkbox", class_="popup-control"),
T.span(class_="popup-head proplabel")[
label],
T.div(class_="popup-body")[
T.ul(class_="compactlist")[[
T.li[obj] for obj in objs]]]]
else:
#... and the property only has blank nodes as objects
yield T.span(class_="proplabel")[label]
def get_url(self):
"""returns this term's full RDF URI.
"""
return self.vocabulary.baseuri+"#"+self.term
def as_html(self):
"""returns elementtree for an HTML table line for this term.
"""
preliminary = ("ivoasem:preliminary", None) in self.relations
deprecated = ("ivoasem:deprecated", None) in self.relations
formatted_relations = []
for rel in self._format_more_relations():
append_with_sep(formatted_relations, rel, T.br)
if preliminary:
row_class = "preliminary"
elif deprecated:
row_class = "deprecated"
else:
row_class = "term"
parents = []
for name in self.get_objects_for(self.vocabulary.wider_predicate):
append_with_sep(parents, self._format_term_as_html(name), ", ")
el = T.tr(class_=row_class, id=self.term)[
T.td(class_="term")[
T.a(title="Copy the link URL for this term's RDF URI",
href=self.get_url())[self.term],
" (Preliminary)" if preliminary else "",
" (Deprecated)" if deprecated else ""],
T.td(class_="label")[self.label],
T.td(class_="description")[self.description],
T.td(class_="parent")[parents],
T.td(class_="morerels")[formatted_relations],]
return el
########### Vocabulary classes
# They do a bit much right now (parsing, managing, writing); we may
# want to refactor that and have vocabulary parsers and writers; but
# then the way this is built they aren't really independent, and so
# there's not much to be gained except smaller classes.
class Vocabulary(object):
"""The base class of Vocabularies.
Vocabularies are constructed with the keys from vocabs.conf in a
dictionary (which then show up in attributes). See
VOCABULARY_MANDATORY_KEYS for the minimal required keys.
The attributes you can rely on here are:
* baseuri: the vocabulary URI (the terms will be baseuri#term)
* name: the vocabulary name; this must should consist of lowercase
letters and underscores only. Legacy vocabularies may use uppercase
letters, too.
* path: local path segments after rdf/. This should only be given
for legacy vocabularies and otherwise is just name.
* filename: the name of the source file (should only be given if
not <path>/terms.csv
* timestamp, description, authors, title: as in vocabs.conf
* draft: true if there's a key draft in vocabs.conf
* terms: a dictionary of the terms as strings to the respective Term
instances.
* licenseuri: a license URI. Only use for externally managed
vocabularies; IVOA vocabularies are always CC-0.
* hidden: if True, no META.INF is being written (meaning:
the vocabulary will not show up in the repo).
* licensehtml: a human-readable license text that is reproduced
verbatim in HTML. Again, only use for externally managed vocabularies.
* topconcepts: space-separated identifiers that are declared as SKOS
top concepts.
To derive a subclass, you need to define:
* term_class -- the class of terms in this vocabulary
* wider_predicate -- the predicate to link a term to its parent
* label_property -- the predicate to assign a human-readable label
to a term
* description_property -- the predicate to assign a human-readable
definition to a term
* flavour -- a string that becomes the object to ivoasem:vocflavour
"""
def __init__(self, meta):
missing_keys = VOCABULARY_MANDATORY_KEYS-set(meta)
if missing_keys:
raise ReportableError("Vocabulary definition for {} incomplete:"
" {} missing.".format(
meta.get("name", "<unnamed>"),
", ".join(missing_keys)))
self.draft = bool(meta.pop("draft", False))
self.hidden = bool(meta.pop("hidden", False))
path = meta.get("path", meta["name"])
defaults = {
"path": path,
"baseuri": IVOA_RDF_URI+path,
"filename": os.path.join(path, "terms.csv"),
"licensehtml": DEFAULT_LICENSE_HTML,
"licenseuri":
"http://creativecommons.org/publicdomain/zero/1.0/",
"topconcepts": "",
}
defaults.update(meta)
meta = defaults
for key, value in meta.items():
setattr(self, key, value)
self._load_terms()
self.inverted_wider = invert_wider(self)
def _read_terms_source(self):
"""must add a terms attribute self containing Term instances.
This needs to be overridden in derived classes.
"""
raise NotImplementedError("Base vocabularies cannot parse;"
" use a derived class")
def _load_terms(self):
"""arranges for the term attribute to be created.
If you want to read something else than our custom CSV,
override the _read_terms_source method, not this one; this
method may do some additional validation useful for all
classes of vocabularies.
"""
try:
# just see whether the file is readable.
with open(self.filename, "rb") as f:
_ = f.read(10)
except IOError as ex:
raise ReportableError(
"Expected terms file {}.terms cannot be read: {}".format(
self.filename, ex))
self._read_terms_source()
def get_meta_dict(self):
"""returns the common meta items of this vocabulary as
a str->str dictionary.
"""
return {
"baseuri": self.baseuri,
"name": self.name,
"path": self.path,
"timestamp": self.timestamp,
"description": self.description,
"authors": self.authors,
"title": self.title,
"flavour": self.flavour,
"licenseuri": self.licenseuri}
def write_turtle(self):
"""writes a turtle representation of the vocabulary to
the current directory as <name>.ttl.
"""
with open(self.name+".ttl", "w", encoding="utf-8") as f:
meta_items = dict((k, make_ttl_literal(v))
for k, v in self.get_meta_dict().items())
meta_items["creators"] = ",\n ".join(
'[ foaf:name {} ]'.format(make_ttl_literal(n.strip()))
for n in self.authors.split(";"))
f.write(TTL_HEADER_TEMPLATE.format(**meta_items))
for top_concept in self.topconcepts.split():
f.write(f"<> skos:hasTopConcept <#{top_concept}>.\n")
for _, term in sorted(self.terms.items()):
f.write(term.as_ttl())
f.write("\n\n")
def write_rdfx(self):
"""writes an RDF/X representation of the current vocabulary
to current directory as <name>.rdf
Since we never actually deal with proper RDF triples in here (so
far), we create the RDF/X as an export of our turtle code. Perhaps
that's even for the better.
"""
triples = rdflib.Graph()
with open(self.name+".ttl", "r", encoding="utf-8") as f:
triples.parse(file=f, format="turtle")
with open(self.name+".rdf", "wb") as f:
triples.serialize(f, "xml")
def write_desise(self):
"""writes a dead simple semantics json into the current directory
as <name>.desise.
"""
with open(self.name+".desise", "w", encoding="utf-8") as f:
json.dump(to_desise_dict(self), f, indent=" ")
def get_html_body(self):
"""returns HTML DOM material for the terms in this vocabulary.
"""
return T.table(class_="terms")[
T.thead[
T.tr[
T.th(title="The formal name of the term as used in URIs"
)["Term"],
T.th(title="Suggested label for the predicate"
" in human-facing UIs")["Label"],
T.th(title="Human-readable description of the predicate"
)["Description"],
T.th(title="If the predicate is in a wider-narrower relationship"
" to other predicates: The more general term.")["Parent"],
T.th(title="Further properties of this term.")[
"More"],
],
],
T.tbody[
[t.as_html() for _, t in sorted(self.terms.items())]
]]
def write_html(self):
"""writes an HTML representation of this vocabulary to the
current directory as <name>.html.
Override the get_html_body method to change this method's
behaviour; what's in here is just the source format-independent
material.
"""
# licensehtml is an HTML literal; parse it first so the elements
# don't get escaped
license_element = etree.fromstring(
'<p id="license">'+self.licensehtml+'</p>')
doc = T.html(xmlns="http://www.w3.org/1999/xhtml")[
T.head[
T.title["IVOA Vocabulary: "+self.title],
T.meta(http_equiv="content-type",
content="text/html;charset=utf-8"),
T.script(type="text/javascript") [JAVASCRIPT],
T.style(type="text/css")[
CSS_STYLE],],
T.body[
T.h1["IVOA Vocabulary: "+self.title],
T.div(class_="intro")[
T.p["This is the description of the vocabulary ",
T.code[self.baseuri],
" as of {}.".format(self.timestamp)],
T.p(class_="draftwarning")["This vocabulary is not"
" yet approved by the IVOA. This means that"
" terms can still disappear without prior notice."]