From aca43e3a3d432ac8268c43ad801ca2d45ec8eb7b Mon Sep 17 00:00:00 2001 From: "Dr. Stanley A Bernsteen" Date: Sun, 6 Jun 2021 14:20:05 -0400 Subject: [PATCH 1/2] Version 1.1 -> 1.2 Updated CHANGELOG.md and README.md Modified src/validator.py as follows: To flag ordered lists (olo) as UnsupportedFeature instead of random bnode errors. To flag properties not in the ontology as unknown property Version 1.1 assumed an open model, where unknown properties are acceped as unconstrained. Version 1.2 uses a less-open model, where properties not in the ontology are rejected properties not known to the class (but known to the ontology) are accepted as constrained. --- CHANGELOG.md | 6 ++++++ README.md | 2 +- src/validator.py | 43 +++++++++++++++++++++++++++++++++++++++---- 3 files changed, 46 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6af40e1..45308b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,3 +18,9 @@ ### Added - Changelog file + + +## [1.2] - 2021-06-05 +### Fixed +- Fixed error messages to flag ordered lists (olo) as an UnsupportedFeature +- Fixed validator to flag unknown properties as errors diff --git a/README.md b/README.md index 3790018..6fbb53c 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # UCO-Utility-Pre-0.7.0-Validator -## Description - (Beta Release Version 1.1) +## Description - (Beta Release Version 1.2) The UCO/CASE Validation Toolkit provides the capability to validate JSON-LD data files against a turtle-file based ontology such as the Unified Cyber Ontology (UCO) and Cyber-Investigation Analysis Standard Expression (CASE). diff --git a/src/validator.py b/src/validator.py index 1c66ccf..d3f995b 100644 --- a/src/validator.py +++ b/src/validator.py @@ -8,7 +8,7 @@ ''' import pprint # For debug import rdflib -from rdflib.namespace import RDF, XSD +from rdflib.namespace import RDF, RDFS, XSD from triples import get_spo_dict from class_constraints import ClassConstraints from message import DataError, UnsupportedFeature, ConstraintError @@ -16,6 +16,8 @@ from xsd_validator import validate_xsd from context import Context +OLO=rdflib.Namespace('http://purl.org/ontology/olo/core#') + def validate(ontology, case_data): ''' @@ -37,10 +39,10 @@ def validate(ontology, case_data): ontology.constraints, ontology.property_ranges, ontology.ancestor_classes, context) # Return error messages sorted by line number + error_messages = list(set(error_messages)) # keep unique messages only error_messages.sort(key=lambda x: x.line_number if x.line_number else 0) return error_messages - def validate_case_data(spo_dict, line_numbers, ontology_constraints, ontology_property_ranges, ontology_ancestor_classes, context): ''' @@ -68,6 +70,13 @@ def validate_case_data(spo_dict, line_numbers, ontology_constraints, ontology_pr # Make sure there's exactly one type. If it isn't, skip this Subject subject_type_uris = po_dict.get(RDF.type) + # If there are any olo properties, unsupported feature + if has_olo_property(po_dict): + error_messages.append(UnsupportedFeature( + message='ordered list (olo) not supported', + line_number=line_number)) + continue + # If subject has no type, error if not subject_type_uris: subject_description = subject.__class__.__name__.split('.')[-1] @@ -160,7 +169,7 @@ def validate_case_data(spo_dict, line_numbers, ontology_constraints, ontology_pr errmsg.property_uri = property_uri errmsg.onto_class_uri = subject_type_uri error_messages.extend(errmsgs) - # #print('Validating literals for {} got {} error messages'.format(subject, len(errmsgs))) + #print('Validating literals for {} got {} error messages'.format(subject, len(errmsgs))) # Done! Return error messages return error_messages @@ -230,10 +239,14 @@ def validate_range_constraints(pvt_dict, ontology_property_ranges, ancestor_clas # Check property ranges for each property for property_uri, vt_dict in pvt_dict.items(): + # Skip metadata properties + if property_uri in (RDFS.comment, RDFS.label): + continue + # Identify property's range # If there's a class_constraint and it has a range, use that range. # Otherwise if there's a "global" property range, use that range. - # Otherwise, there's no range, do there's nothing to check + # Otherwise, there's no range, this is an unknown property so reject it property_range = None if class_constraints: property_constraints = class_constraints.get_property_constraints(property_uri) @@ -243,6 +256,11 @@ def validate_range_constraints(pvt_dict, ontology_property_ranges, ancestor_clas if not property_range: property_range = ontology_property_ranges.get(property_uri) # could still be None + if not property_range: # unknown property + error_messages.append(ConstraintError( + message='unknown property', + property_uri=property_uri)) + # If there's a property range, check that the property range is the same as or an ancestor of the value type if property_range: @@ -378,6 +396,11 @@ def get_value_type(value, spo_dict, context): errmsg = DataError(message='missing link <{}>'.format(link)) return None, [errmsg] + # If it's a BNode and it has any olo properties, unsupported feature + if has_olo_property(po_dict): + errmsg = UnsupportedFeature(message='ordered lists (olo) not supported') + return None, [errmsg] + # Get the BNode or URIRef's types datatypes = po_dict.get(RDF.type) # SET of classes (URIRefs), should have only one member @@ -400,3 +423,15 @@ def get_value_type(value, spo_dict, context): message='unrecognized data value {} of type {}, expected a Literal, URIRef or BNode'.format( value, type(value))) return None, [errmsg] + + +def has_olo_property(po_dict): + ''' + Arguments: + po_dict dictionary {property:object} + + Return: + True if (at least) one of the properties is an ordered list (olo) + False if none of the properties are ordered list (olo) + ''' + return any([OLO in prop for prop in po_dict.keys()]) From 4f5396afd35f7078a8024638dce1fe80be1799ad Mon Sep 17 00:00:00 2001 From: "Dr. Stanley A Bernsteen" Date: Fri, 17 Sep 2021 13:29:15 -0400 Subject: [PATCH 2/2] Fixed two bugs In validator.py, replaced startswith(XSD) with startswith(str(XSD)). This bug has already been fixed in the UCO github. In precondition.py, replaced len(alphabet) with len(alphabet)**prefix_length. This is a new bug --- src/precondition.py | 2 +- src/validator.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/precondition.py b/src/precondition.py index f995c9e..897ac96 100644 --- a/src/precondition.py +++ b/src/precondition.py @@ -134,7 +134,7 @@ def autogenerate_empty_prefix(text, prefix_length, alphabet): # If we found all possible strings (this is really unlikely), # we probably need to increase the string length - if len(non_candidate_prefix_strings) == len(alphabet): + if len(non_candidate_prefix_strings) == len(alphabet)**prefix_length: raise Exception('Could not find unused prefix sequence!') # Look for a prefix string that is not is the non_candidate set diff --git a/src/validator.py b/src/validator.py index d3f995b..c233769 100644 --- a/src/validator.py +++ b/src/validator.py @@ -266,6 +266,7 @@ def validate_range_constraints(pvt_dict, ontology_property_ranges, ancestor_clas if property_range: for value, value_type in vt_dict.items(): if not (property_range == value_type or property_range in ancestor_classes.get(value_type, [])): + #import pdb; pdb.set_trace() error_messages.append(ConstraintError( message="property's value {} is a {} but must be a {}".format( '' if isinstance(value, rdflib.term.BNode) else value, @@ -309,7 +310,7 @@ def validate_literal(literal, constraints, context): message='Literal {} has datatype that is not a URIRef: {}'.format(literal, literal.datatype))] # If Literal datatype is an XSD type, validate it and return list of error messages - if str(literal.datatype).startswith(XSD) or str(literal.datatype).startswith('xsd:') or str(literal.datatype).startswith('xs:'): + if str(literal.datatype).startswith(str(XSD)) or str(literal.datatype).startswith('xsd:') or str(literal.datatype).startswith('xs:'): return validate_xsd(str(literal), literal.datatype) # If we're here, Literal datatype is a URIRef and not an XSD type.