ucoProject · stanbernsteen · Jun 6, 2021 · Sep 17, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -18,3 +18,9 @@
 
 ### Added
 - Changelog file
+
+
+## [1.2] - 2021-06-05
+### Fixed
+- Fixed error messages to flag ordered lists (olo) as an UnsupportedFeature
+- Fixed validator to flag unknown properties as errors
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # UCO-Utility-Pre-0.7.0-Validator
 
-## Description -  (Beta Release  Version 1.1)
+## Description -  (Beta Release  Version 1.2)
 
 The UCO/CASE Validation Toolkit provides the capability to validate JSON-LD data files against a turtle-file based ontology such as the Unified Cyber Ontology (UCO) and Cyber-Investigation Analysis Standard Expression (CASE).
 

diff --git a/src/precondition.py b/src/precondition.py
@@ -134,7 +134,7 @@ def autogenerate_empty_prefix(text, prefix_length, alphabet):
 
     # If we found all possible strings (this is really unlikely),
     # we probably need to increase the string length
-    if len(non_candidate_prefix_strings) == len(alphabet):
+    if len(non_candidate_prefix_strings) == len(alphabet)**prefix_length:
         raise Exception('Could not find unused prefix sequence!')
 
     # Look for a prefix string that is not is the non_candidate set

diff --git a/src/validator.py b/src/validator.py
@@ -8,14 +8,16 @@
 '''
 import pprint  # For debug
 import rdflib
-from rdflib.namespace import RDF, XSD
+from rdflib.namespace import RDF, RDFS, XSD
 from triples import get_spo_dict
 from class_constraints import ClassConstraints
 from message import DataError, UnsupportedFeature, ConstraintError
 from datatype_constraints import DatatypeConstraints
 from xsd_validator import validate_xsd
 from context import Context
 
+OLO=rdflib.Namespace('http://purl.org/ontology/olo/core#')
+
 
 def validate(ontology, case_data):
     '''
@@ -37,10 +39,10 @@ def validate(ontology, case_data):
         ontology.constraints, ontology.property_ranges, ontology.ancestor_classes, context)
 
     # Return error messages sorted by line number
+    error_messages = list(set(error_messages))  # keep unique messages only
     error_messages.sort(key=lambda x: x.line_number if x.line_number else 0)
     return error_messages
 
-
 def validate_case_data(spo_dict, line_numbers, ontology_constraints, ontology_property_ranges,
                        ontology_ancestor_classes, context):
     '''
@@ -68,6 +70,13 @@ def validate_case_data(spo_dict, line_numbers, ontology_constraints, ontology_pr
         # Make sure there's exactly one type. If it isn't, skip this Subject
         subject_type_uris = po_dict.get(RDF.type)
 
+        # If there are any olo properties, unsupported feature
+        if has_olo_property(po_dict):
+            error_messages.append(UnsupportedFeature(
+                message='ordered list (olo) not supported',
+                line_number=line_number))
+            continue
+
         # If subject has no type, error
         if not subject_type_uris:
             subject_description = subject.__class__.__name__.split('.')[-1]
@@ -160,7 +169,7 @@ def validate_case_data(spo_dict, line_numbers, ontology_constraints, ontology_pr
                     errmsg.property_uri = property_uri
                     errmsg.onto_class_uri = subject_type_uri
                 error_messages.extend(errmsgs)
-        #    #print('Validating literals for {} got {} error messages'.format(subject, len(errmsgs)))
+            #print('Validating literals for {} got {} error messages'.format(subject, len(errmsgs)))
 
     # Done!  Return error messages
     return error_messages
@@ -230,10 +239,14 @@ def validate_range_constraints(pvt_dict, ontology_property_ranges, ancestor_clas
     # Check property ranges for each property
     for property_uri, vt_dict in pvt_dict.items():
 
+        # Skip metadata properties
+        if property_uri in (RDFS.comment, RDFS.label):
+            continue
+
         # Identify property's range
         # If there's a class_constraint and it has a range, use that range.
         # Otherwise if there's a "global" property range, use that range.
-        # Otherwise, there's no range, do there's nothing to check
+        # Otherwise, there's no range, this is an unknown property so reject it
         property_range = None
         if class_constraints:
             property_constraints = class_constraints.get_property_constraints(property_uri)
@@ -243,11 +256,17 @@ def validate_range_constraints(pvt_dict, ontology_property_ranges, ancestor_clas
         if not property_range:
             property_range = ontology_property_ranges.get(property_uri)   # could still be None
 
+        if not property_range:   # unknown property
+            error_messages.append(ConstraintError(
+                message='unknown property',
+                property_uri=property_uri))
+
 
         # If there's a property range, check that the property range is the same as or an ancestor of the value type
         if property_range:
             for value, value_type in vt_dict.items():
                 if not (property_range == value_type or property_range in ancestor_classes.get(value_type, [])):
+                    #import pdb; pdb.set_trace()
                     error_messages.append(ConstraintError(
                         message="property's value {} is a {} but must be a {}".format(
                             '' if isinstance(value, rdflib.term.BNode) else value,
@@ -291,7 +310,7 @@ def validate_literal(literal, constraints, context):
                    message='Literal {} has datatype that is not a URIRef: {}'.format(literal, literal.datatype))]
 
     # If Literal datatype is an XSD type, validate it and return list of error messages
-    if str(literal.datatype).startswith(XSD) or str(literal.datatype).startswith('xsd:') or str(literal.datatype).startswith('xs:'):
+    if str(literal.datatype).startswith(str(XSD)) or str(literal.datatype).startswith('xsd:') or str(literal.datatype).startswith('xs:'):
         return validate_xsd(str(literal), literal.datatype)
 
     # If we're here, Literal datatype is a URIRef and not an XSD type.
@@ -378,6 +397,11 @@ def get_value_type(value, spo_dict, context):
             errmsg = DataError(message='missing link <{}>'.format(link))
             return None, [errmsg]
 
+        # If it's a BNode and it has any olo properties, unsupported feature
+        if has_olo_property(po_dict):
+            errmsg = UnsupportedFeature(message='ordered lists (olo) not supported')
+            return None, [errmsg]
+
         # Get the BNode or URIRef's types
         datatypes = po_dict.get(RDF.type)   # SET of classes (URIRefs), should have only one member
 
@@ -400,3 +424,15 @@ def get_value_type(value, spo_dict, context):
             message='unrecognized data value {} of type {}, expected a Literal, URIRef or BNode'.format(
                 value, type(value)))
         return None, [errmsg]
+
+
+def has_olo_property(po_dict):
+    '''
+    Arguments:
+        po_dict     dictionary {property:object}
+
+    Return:
+        True  if (at least) one of the properties is an ordered list (olo)
+        False if none of the properties are ordered list (olo)
+    '''
+    return any([OLO in prop for prop in po_dict.keys()])