Skip to content

Commit

Permalink
Merge pull request #175 from psychoinformatics-de/example-git-provision
Browse files Browse the repository at this point in the history
First attempt at a data provisioning specification
  • Loading branch information
mih authored Nov 27, 2024
2 parents 8ef7b6c + e517d0e commit edec937
Show file tree
Hide file tree
Showing 13 changed files with 163 additions and 85 deletions.
6 changes: 3 additions & 3 deletions src/datalad-dataset/unreleased.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ prefixes:
annex-key: https://concepts.datalad.org/ns/annex-key/
bibo: http://purl.org/ontology/bibo/
CiTO: http://purl.org/spar/cito/
DCAT: http://www.w3.org/ns/dcat#
dcat: http://www.w3.org/ns/dcat#
datalad-ds: https://concepts.datalad.org/ns/dataset-uuid/
dcterms: http://purl.org/dc/terms/
DCTYPES: http://purl.org/dc/dcmitype/
dctypes: http://purl.org/dc/dcmitype/
dlco: https://concepts.datalad.org/
dldist: https://concepts.datalad.org/s/distribution/unreleased/
dpv: https://w3id.org/dpv#
Expand Down Expand Up @@ -70,7 +70,7 @@ default_prefix: dldist

emit_prefixes:
- CiTO
- DCAT
- dcat
- dldist
- licenses
- marcrel
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"id": "gitsha:eb4d2457a1165519c61859152fe0e3394200d75d",
"identifier": [
{
"notation": "eb4d2457a1165519c61859152fe0e3394200d75d",
"schema_agency": "https://git-scm.com"
}
],
"meta_type": "dldist:Distribution",
"type": "https://git-scm.com/book/en/v2/Git-Internals-Git-Objects#_git_commit_objects",
"relation": [
{
"id": "https://github.com/datalad-datasets/machinelearning-books.git",
"meta_type": "dldist:DataService",
"type": "http://usefulinc.com/ns/doap#GitRepository",
"endpoint_url": "https://github.com/datalad-datasets/machinelearning-books.git"
}
],
"access_service": [
"https://github.com/datalad-datasets/machinelearning-books.git"
],
"has_part": [
{
"id": "gitsha:f776e30f386b83e13196eab6445f30d3ab54c155",
"meta_type": "dldist:Distribution",
"access_service": [
"https://github.com/datalad-datasets/machinelearning-books.git"
]
}
],
"qualified_part": [
{
"name": "README.md",
"entity": "gitsha:f776e30f386b83e13196eab6445f30d3ab54c155"
}
],
"@type": "Distribution"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# A dataset version distribution in the form of a Git commit
id: gitsha:eb4d2457a1165519c61859152fe0e3394200d75d
# abusing the documentation as a type definition URL of a Git commit
# TODO define term in `datalad-dataset` schema
type: https://git-scm.com/book/en/v2/Git-Internals-Git-Objects#_git_commit_objects
identifier:
# a Git SHA
notation: eb4d2457a1165519c61859152fe0e3394200d75d
schema_agency: https://git-scm.com
# A hosted (http-accessible) Git repository is the access service for this
# commit and the associated tree.
# We could also consider all of GitHub as an access service, and declare
# `org` and `project as two parameters. This would make sense in a larger
# setup, but here we are trying to be self-contained and minimal --
# applicable to any Git repo hosted anywhere
access_service:
- https://github.com/datalad-datasets/machinelearning-books.git
relation:
# we leave inline information on the hosted Git repo in a relation.
# use id from access_service property above
- id: https://github.com/datalad-datasets/machinelearning-books.git
# identify as a dataservice -- not strictly needed for an implementation
# that wants to obtain the commit, because we have the id from `access_service`
# to match against
meta_type: dldist:DataService
# we use the DOAP (description of a project) term to identify this dataservice
# as a Git repository.
# TODO define term in `datalad-dataset` schema
type: http://usefulinc.com/ns/doap#GitRepository
# endpoint URL is the cloneable URL
endpoint_url: https://github.com/datalad-datasets/machinelearning-books.git
has_part:
# use the Git blob SHA as ID
# TODO enable `commit:relpath` type identifier?
- id: gitsha:f776e30f386b83e13196eab6445f30d3ab54c155
access_service:
- https://github.com/datalad-datasets/machinelearning-books.git
qualified_part:
name: README.md
entity: gitsha:f776e30f386b83e13196eab6445f30d3ab54c155
72 changes: 36 additions & 36 deletions src/distribution/unreleased.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ prefixes:
ADMS: http://www.w3.org/ns/adms#
bibo: http://purl.org/ontology/bibo/
CiTO: http://purl.org/spar/cito/
DCAT: http://www.w3.org/ns/dcat#
dcat: http://www.w3.org/ns/dcat#
dcterms: http://purl.org/dc/terms/
DCTYPES: http://purl.org/dc/dcmitype/
dctypes: http://purl.org/dc/dcmitype/
dlco: https://concepts.datalad.org/
dldist: https://concepts.datalad.org/s/distribution/unreleased/
dpv: https://w3id.org/dpv#
Expand Down Expand Up @@ -93,7 +93,7 @@ default_prefix: dldist

emit_prefixes:
- CiTO
- DCAT
- dcat
- dldist
- licenses
- marcrel
Expand Down Expand Up @@ -140,10 +140,10 @@ slots:
- SHOULD be used to link to a description of a dcat:DataService that can provide access to the subject.
range: DataService
exact_mappings:
- DCAT:accessService
- dcat:accessService
related_mappings:
- DCAT:accessURL
- DCAT:landingPage
- dcat:accessURL
- dcat:landingPage

access_url:
slot_uri: dldist:access_url
Expand All @@ -153,10 +153,10 @@ slots:
comments:
- If the subject is available directly, typically through a HTTP Get request, `download_url` SHOULD be used instead.
exact_mappings:
- DCAT:accessURL
- dcat:accessURL
related_mappings:
- DCAT:downloadURL
- DCAT:landingPage
- dcat:downloadURL
- dcat:landingPage

address:
slot_uri: dldist:address
Expand Down Expand Up @@ -191,7 +191,7 @@ slots:
The size of a distribution in bytes.
range: NonNegativeInteger
exact_mappings:
- DCAT:byteSize
- dcat:byteSize

checksum:
slot_uri: dldist:checksum
Expand All @@ -208,7 +208,7 @@ slots:
Relevant contact information for the subject.
range: Agent
exact_mappings:
- DCAT:contactPoint
- dcat:contactPoint

date_modified:
slot_uri: dldist:date_modified
Expand Down Expand Up @@ -247,9 +247,9 @@ slots:
An available distribution of a resource.
range: Distribution
close_mappings:
- DCAT:distribution
- dcat:distribution
comments:
- Unlike `DCAT:distribution`, this property does not restrict its domain to
- Unlike `dcat:distribution`, this property does not restrict its domain to
a dataset.

download_url:
Expand All @@ -260,10 +260,10 @@ slots:
comments:
- SHOULD be used for the URL at which this distribution is available directly, typically through a HTTP Get request.
exact_mappings:
- DCAT:downloadURL
- dcat:downloadURL
related_mappings:
- DCAT:accessURL
- DCAT:landingPage
- dcat:accessURL
- dcat:landingPage

download_url_template:
slot_uri: dldist:download_url_template
Expand Down Expand Up @@ -294,7 +294,7 @@ slots:
including their operations, parameters etc.
range: uri
exact_mappings:
- DCAT:downloadURL
- dcat:downloadURL
related_mappings:
- dldist:endpoint_url
- dlthing:conforms_to
Expand All @@ -305,7 +305,7 @@ slots:
The root location or primary endpoint of a service (a Web-resolvable IRI).
range: uri
exact_mappings:
- DCAT:endpointURL
- dcat:endpointURL
related_mappings:
- dldist:endpoint_description
- dlthing:conforms_to
Expand Down Expand Up @@ -341,7 +341,7 @@ slots:
is_distribution_of:
slot_uri: dldist:is_distribution_of
description: >-
Inverse property of `DCAT:distribution`.
Inverse property of `dcat:distribution`.
domain: Distribution
range: Resource
inverse: distribution
Expand All @@ -360,7 +360,7 @@ slots:
A related resource of which the described resource is a version.
range: uriorcurie
exact_mappings:
- DCAT:isVersionOf
- dcat:isVersionOf

keyword:
slot_uri: dldist:keyword
Expand All @@ -379,7 +379,7 @@ slots:
to a resource, its distributions and/or additional information.
range: uri
exact_mappings:
- DCAT:landingPage
- dcat:landingPage
- foaf:page
comments:
- If the distribution(s) are accessible only through a landing page (i.e., direct download URLs are not known), then the landing page link SHOULD be duplicated as `dldist:access_url` on a distribution.
Expand All @@ -392,7 +392,7 @@ slots:
range: LicenseDocument
exact_mappings:
- dcterms:license
- DCAT:license
- dcat:license

license_text:
slot_uri: dldist:license_text
Expand All @@ -415,31 +415,31 @@ slots:
see_also:
- https://www.iana.org/assignments/media-types
exact_mappings:
- DCAT:mediaType
- dcat:mediaType

qualified_access:
slot_uri: dlco:qualified_access
description: >-
Link to a description of a `access_service` relationship with
`DCAT:DataService`.
`dcat:DataService`.
broad_mappings:
- DCAT:qualifiedRelation
- dcat:qualifiedRelation
range: QualifiedAccess

qualified_part:
slot_uri: dldist:qualified_part
description: >-
Qualified a `hasPart` relationship with another entity.
broad_mappings:
- DCAT:qualifiedRelation
- dcat:qualifiedRelation

version:
slot_uri: dldist:version
description: >-
Version indicator (name or identifier) of a resource.
range: string
exact_mappings:
- DCAT:version
- dcat:version
- pav:version


Expand Down Expand Up @@ -530,7 +530,7 @@ classes:
inlined_as_list: true
range: DistributionPart
exact_mappings:
- DCAT:Distribution
- dcat:Distribution

Resource:
class_uri: dldist:Resource
Expand All @@ -554,7 +554,7 @@ classes:
is_version_of:
range: Resource
exact_mappings:
- DCAT:Resource
- dcat:Resource

LicenseDocument:
class_uri: dldist:LicenseDocument
Expand All @@ -578,7 +578,7 @@ classes:
An association class for attaching additional information to a
hasPart relationship.
broad_mappings:
- DCAT:Relationship
- dcat:Relationship
slots:
- name
- entity
Expand Down Expand Up @@ -643,21 +643,21 @@ classes:
comments:
- Characteristics of a particular `Dataservice` that do not vary across `Distributions` that can be requested from the `DataService` are considered properties (`has_property`) of the `Dataservice`. In contrast, information needed in addition for requesting a particular `Distribution` are considered an access request parameter (`has_parameter`). Such parameters can be declared for a `DataService`, and provided for a particular `Distribution` via a dedicated `QualifiedAccess` relation.
exact_mappings:
- DCAT:DataService
- dcat:DataService
broad_mappings:
- DCAT:Resource
- DCTYPES:Service
- dcat:Resource
- dctypes:Service
todos:
- Enable indication what kind of credentials are required for access, and where to obtain them.

QualifiedAccess:
class_uri: dldist:QualifiedAccess
description: >-
An association class for attaching additional information to an
`access_service` relationship between a `DCAT:Distribution` and
a `DCAT:DataService`.
`access_service` relationship between a `dcat:Distribution` and
a `dcat:DataService`.
related_mappings:
- DCAT:access_service
- dcat:access_service
slots:
- access_service
- has_parameter
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
},
{
"name": "url",
"type": "DCAT:endpointURL",
"type": "dcat:endpointURL",
"value": "https://dav.box.com/dav/git-annex",
"meta_type": "dlthing:Property"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ has_property:
value: webdav
- name: url
value: https://dav.box.com/dav/git-annex
type: DCAT:endpointURL
type: dcat:endpointURL
- name: chunk
value: 10mb
- name: keyid
Expand Down
4 changes: 2 additions & 2 deletions src/linkml/ontology/common.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ slots:
range: LicenseDocument
exact_mappings:
- dcterms:license
- DCAT:license
- dcat:license

license_text:
slot_uri: dlco:license_text
Expand Down Expand Up @@ -173,7 +173,7 @@ slots:
Version indicator (name or identifier) of a resource.
range: string
exact_mappings:
- DCAT:version
- dcat:version
- pav:version

uuid:
Expand Down
Loading

0 comments on commit edec937

Please sign in to comment.