Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Version 1.1 -> 1.2 #8

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,9 @@

### Added
- Changelog file


## [1.2] - 2021-06-05
### Fixed
- Fixed error messages to flag ordered lists (olo) as an UnsupportedFeature
- Fixed validator to flag unknown properties as errors
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# UCO-Utility-Pre-0.7.0-Validator

## Description - (Beta Release Version 1.1)
## Description - (Beta Release Version 1.2)

The UCO/CASE Validation Toolkit provides the capability to validate JSON-LD data files against a turtle-file based ontology such as the Unified Cyber Ontology (UCO) and Cyber-Investigation Analysis Standard Expression (CASE).

Expand Down
2 changes: 1 addition & 1 deletion src/precondition.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def autogenerate_empty_prefix(text, prefix_length, alphabet):

# If we found all possible strings (this is really unlikely),
# we probably need to increase the string length
if len(non_candidate_prefix_strings) == len(alphabet):
if len(non_candidate_prefix_strings) == len(alphabet)**prefix_length:
raise Exception('Could not find unused prefix sequence!')

# Look for a prefix string that is not is the non_candidate set
Expand Down
46 changes: 41 additions & 5 deletions src/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@
'''
import pprint # For debug
import rdflib
from rdflib.namespace import RDF, XSD
from rdflib.namespace import RDF, RDFS, XSD
from triples import get_spo_dict
from class_constraints import ClassConstraints
from message import DataError, UnsupportedFeature, ConstraintError
from datatype_constraints import DatatypeConstraints
from xsd_validator import validate_xsd
from context import Context

OLO=rdflib.Namespace('http://purl.org/ontology/olo/core#')


def validate(ontology, case_data):
'''
Expand All @@ -37,10 +39,10 @@ def validate(ontology, case_data):
ontology.constraints, ontology.property_ranges, ontology.ancestor_classes, context)

# Return error messages sorted by line number
error_messages = list(set(error_messages)) # keep unique messages only
error_messages.sort(key=lambda x: x.line_number if x.line_number else 0)
return error_messages


def validate_case_data(spo_dict, line_numbers, ontology_constraints, ontology_property_ranges,
ontology_ancestor_classes, context):
'''
Expand Down Expand Up @@ -68,6 +70,13 @@ def validate_case_data(spo_dict, line_numbers, ontology_constraints, ontology_pr
# Make sure there's exactly one type. If it isn't, skip this Subject
subject_type_uris = po_dict.get(RDF.type)

# If there are any olo properties, unsupported feature
if has_olo_property(po_dict):
error_messages.append(UnsupportedFeature(
message='ordered list (olo) not supported',
line_number=line_number))
continue

# If subject has no type, error
if not subject_type_uris:
subject_description = subject.__class__.__name__.split('.')[-1]
Expand Down Expand Up @@ -160,7 +169,7 @@ def validate_case_data(spo_dict, line_numbers, ontology_constraints, ontology_pr
errmsg.property_uri = property_uri
errmsg.onto_class_uri = subject_type_uri
error_messages.extend(errmsgs)
# #print('Validating literals for {} got {} error messages'.format(subject, len(errmsgs)))
#print('Validating literals for {} got {} error messages'.format(subject, len(errmsgs)))

# Done! Return error messages
return error_messages
Expand Down Expand Up @@ -230,10 +239,14 @@ def validate_range_constraints(pvt_dict, ontology_property_ranges, ancestor_clas
# Check property ranges for each property
for property_uri, vt_dict in pvt_dict.items():

# Skip metadata properties
if property_uri in (RDFS.comment, RDFS.label):
continue

# Identify property's range
# If there's a class_constraint and it has a range, use that range.
# Otherwise if there's a "global" property range, use that range.
# Otherwise, there's no range, do there's nothing to check
# Otherwise, there's no range, this is an unknown property so reject it
property_range = None
if class_constraints:
property_constraints = class_constraints.get_property_constraints(property_uri)
Expand All @@ -243,11 +256,17 @@ def validate_range_constraints(pvt_dict, ontology_property_ranges, ancestor_clas
if not property_range:
property_range = ontology_property_ranges.get(property_uri) # could still be None

if not property_range: # unknown property
error_messages.append(ConstraintError(
message='unknown property',
property_uri=property_uri))


# If there's a property range, check that the property range is the same as or an ancestor of the value type
if property_range:
for value, value_type in vt_dict.items():
if not (property_range == value_type or property_range in ancestor_classes.get(value_type, [])):
#import pdb; pdb.set_trace()
error_messages.append(ConstraintError(
message="property's value {} is a {} but must be a {}".format(
'' if isinstance(value, rdflib.term.BNode) else value,
Expand Down Expand Up @@ -291,7 +310,7 @@ def validate_literal(literal, constraints, context):
message='Literal {} has datatype that is not a URIRef: {}'.format(literal, literal.datatype))]

# If Literal datatype is an XSD type, validate it and return list of error messages
if str(literal.datatype).startswith(XSD) or str(literal.datatype).startswith('xsd:') or str(literal.datatype).startswith('xs:'):
if str(literal.datatype).startswith(str(XSD)) or str(literal.datatype).startswith('xsd:') or str(literal.datatype).startswith('xs:'):
return validate_xsd(str(literal), literal.datatype)

# If we're here, Literal datatype is a URIRef and not an XSD type.
Expand Down Expand Up @@ -378,6 +397,11 @@ def get_value_type(value, spo_dict, context):
errmsg = DataError(message='missing link <{}>'.format(link))
return None, [errmsg]

# If it's a BNode and it has any olo properties, unsupported feature
if has_olo_property(po_dict):
errmsg = UnsupportedFeature(message='ordered lists (olo) not supported')
return None, [errmsg]

# Get the BNode or URIRef's types
datatypes = po_dict.get(RDF.type) # SET of classes (URIRefs), should have only one member

Expand All @@ -400,3 +424,15 @@ def get_value_type(value, spo_dict, context):
message='unrecognized data value {} of type {}, expected a Literal, URIRef or BNode'.format(
value, type(value)))
return None, [errmsg]


def has_olo_property(po_dict):
'''
Arguments:
po_dict dictionary {property:object}

Return:
True if (at least) one of the properties is an ordered list (olo)
False if none of the properties are ordered list (olo)
'''
return any([OLO in prop for prop in po_dict.keys()])