Example of workflow provenance and extension to PROV ontology
authorStian Soiland-Reyes <soiland-reyes@cs.manchester.ac.uk>
Mon, 19 Sep 2011 16:12:53 +0100
changeset 327 c91318f80692
parent 322 5ca5296c51c9
child 328 ac6f4b985178
Example of workflow provenance and extension to PROV ontology

See http://www.w3.org/2011/prov/wiki/WorkflowExample
ontology/examples/ontology-extensions/workflow/implementationOntology.ttl
ontology/examples/ontology-extensions/workflow/prov2.ttl
ontology/examples/ontology-extensions/workflow/workflow.ttl
ontology/examples/ontology-extensions/workflow/workflowDefinition.ttl
ontology/examples/ontology-extensions/workflow/workflowOntology.ttl
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ontology/examples/ontology-extensions/workflow/implementationOntology.ttl	Mon Sep 19 16:12:53 2011 +0100
@@ -0,0 +1,43 @@
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix cnt: <http://www.w3.org/2011/content#> .
+@prefix dcterms: <http://purl.org/dc/terms/> .
+
+# Domain-specific ontologies
+@prefix wf: <http://www.example.com/scientific-workflow#> .
+@prefix impl: <http://company.example.org/engine-implementation#> .
+
+# Note: Stian just made up this URI
+@prefix prov: <http://www.w3.org/2011/prov#> .
+# Not (yet) in the official OWL
+@prefix prov2: <http://www.w3.org/2011/prov/new#> .
+
+
+impl:WorkflowEngine a owl:Class ;
+    rdfs:subClassOf wf:WorkflowEngine .
+
+impl:FileValue a owl:Class ;
+    owl:has_key ( impl:file wf:value ) ;
+    rdfs:subClassOf prov:Entity, wf:Value .
+
+impl:file a owl:DataProperty ;
+    rdfs:domain impl:FileValue .
+
+impl:Workflow a owl:Class ;
+    rdfs:subClassOf wf:ProcessDefinition .    
+
+impl:Constant a owl:Class ;
+    rdfs:subClassOf wf:ProcessorDefinition .
+impl:constant a owl:DataProperty ;
+    rdfs:domain impl:Command .
+
+impl:Command a owl:Class ;
+    rdfs:subClassOf wf:ProcessorDefinition .
+
+impl:command a owl:DataProperty ;
+    rdfs:domain impl:Command .
+
+impl:wasReadFrom a owl:ObjectProperty ;
+    rdfs:domain wf:Value ;
+    rdfs:range impl:FileValue ;
+    rdfs:subPropertyOf prov:wasDerivedFrom .
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ontology/examples/ontology-extensions/workflow/prov2.ttl	Mon Sep 19 16:12:53 2011 +0100
@@ -0,0 +1,85 @@
+@prefix : <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+# Note: Stian just made up this URI
+@prefix prov: <http://www.w3.org/2011/prov#> .
+# Not (yet) in the official OWL
+@prefix prov2: <http://www.w3.org/2011/prov/new#> .
+
+<http://www.w3.org/2011/prov> a owl:Ontology ;
+    # TODO: Sort out these namespaces! ;-)
+    owl:sameAs
+    <http://dvcs.w3.org/hg/prov/raw-file/default/ontology/ProvenanceOntology.owl>,
+        <http://www.w3.org/PROV/ProvenanceOntology.owl> .
+    
+<>     a owl:Ontology;
+        owl:sameAs <http://www.w3.org/2011/prov/new#> ;
+         :comment """Suggested changes to the prov ontology
+         (http://dvcs.w3.org/hg/prov/raw-file/default/ontology/ProvenanceOntology.owl)
+
+         If you wish to make comments regarding this document, please
+         send them to public-prov-wg@w3.org (subscribe, archives). All
+         feedback is welcome."""@en;
+        owl:imports <http://dvcs.w3.org/hg/prov/raw-file/default/ontology/ProvenanceOntology.owl> ;
+         owl:versionInfo "0.3"^^xsd:string .
+
+    
+prov2:EntityInRole a owl:Class, 
+    [ a owl:Restriction ;
+      owl:onProperty prov:wasAssumedBy ;
+      owl:cardinality 1 
+    ],
+    [ a owl:Restriction ;
+      owl:onProperty prov:wasAssumedIn ;
+      owl:cardinality 1 
+    ] ;
+     :comment "An entity assuming a role"@en ;
+     :subClassOf prov:Entity .
+
+prov2:Recipe a owl:Class .
+
+prov2:Role a owl:Class . # NOT an entity
+
+prov2:recipe a owl:ObjectProperty ;
+    :comment """A recipe or plan which somewhat describes what this
+    process execution was intened to execute according. For instance a
+    shell script, workflow definition, cooking recipe or list of
+    instructions."""@en ;
+    :domain prov:ProcessExecution ;
+    :range prov2:Recipe .
+    
+
+# or startedAfter ?
+prov2:startedAt a owl:DataProperty ;
+    :domain prov:ProcessExecution ;
+    :range xsd:dateTime .   
+
+# or endedBefore ?
+prov2:endedAt a owl:DataProperty ;
+    :domain prov:ProcessExecution ;
+    :range xsd:dateTime .   
+    
+# or generatedAfter ? 
+prov2:generatedAt a owl:DataProperty ;
+    :domain prov2:EntityInRole ;
+    :range xsd:dateTime .
+
+prov2:wasAssumedBy a owl:ObjectProperty ;
+    :domain prov2:EntityInRole ;
+    :range prov:Entity ;
+    :subPropertyOf prov:wasComplementOf .
+
+prov2:assumedRole a owl:ObjectProperty ;
+    :domain prov2:EntityInRole ;
+    :range prov2:Role .  
+
+prov2:assumedRole a owl:ObjectProperty ;
+    :domain prov2:EntityInRole ;
+    :range prov:ProcessExecution .  
+
+# TODO: Some inverse property magic to ensure that the
+# prov2:EntityInRole is in the other end of only one used() or have a
+# single wasGeneratedAt
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ontology/examples/ontology-extensions/workflow/workflow.ttl	Mon Sep 19 16:12:53 2011 +0100
@@ -0,0 +1,205 @@
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix cnt: <http://www.w3.org/2011/content#> .
+@prefix dcterms: <http://purl.org/dc/terms/> .
+@prefix foaf: <http://xmlns.com/foaf/0.1/> .
+
+
+# Note: Stian just made up this URI
+@prefix prov: <http://www.w3.org/2011/prov#> .
+# Not (yet) in the official OWL
+@prefix prov2: <http://www.w3.org/2011/prov/new#> .
+
+# Domain-specific ontologies
+@prefix wf: <http://www.example.com/scientific-workflow#> .
+@prefix impl: <http://company.example.org/engine-implementation#> .
+
+# The workflow definition
+@prefix def: <http://my.example.com/workflow-definition#> .
+
+:workflowEngine a impl:WorkflowEngine, prov:Agent .
+
+:aUser a prov:Agent, foaf:Person ;
+    foaf:name "Stian Soiland-Reyes" .
+
+
+# This document is the provenance container (or is that prov:Account ?)
+<> a prov:ProvenanceContainer ; 
+    prov:wasGeneratedBy [
+        # Metadata about who generated the provenance container
+        a prov:ProcessExecution, wf:ProvenanceExport ;
+        prov:used :workflowRun ;
+        prov:wasControlledBy :workflowEngine 
+    ] .
+
+
+:inputFile a prov:Entity, impl:FileValue ;
+    # Note that this file comes from the outside, so we don't know which
+    # process execution it :wasGeneratedBy
+    impl:file "/tmp/myinput.txt" ;
+    impl:value [ 
+        # Snapshot of actual value as it was read by :wfEngine
+        a cnt:ContentAsText ;
+        cnt:characterEncoding "UTF-8" ;
+        cnt:chars "Steve" 
+    ] .
+
+
+:input a prov:Entity, wf:Value ;
+
+    prov:wasDerivedFrom :inputFile ;
+    # as sub-property
+    impl:wasReadFrom :inputFile ;
+
+    prov:wasGeneratedBy [     wf:ranInWorkflowEngine :workflowEngine ; ] ;
+    # We chose not to to capture and describe any further details about
+    # the file-reading process execution, but we know this value was
+    # derived from :inputFile by reading it, initiated by the workflow
+    # engine and selected by the user.
+
+    # The reason :input is used by the workflow below instead of
+    # :inputFile is that the content of impl:file might have changed by
+    # the time :input was used - but it is *this* value which was kept
+    # in memory and used by the workflow. We know it was read from the
+    # file before the first usage of :input, and the beginning of
+    # :workflowEngine.
+
+    wf:value [ 
+        a cnt:ContentAsText ;
+        cnt:characterEncoding "UTF-8" ;
+        cnt:chars "Steve" 
+    ] .
+
+
+:workflowRun a prov:ProcessExecution, wf:Process ;
+    prov2:recipe def:workflowDef ;
+    wf:wasDefinedBy def:workflowDef ;
+
+    prov:wasControlledBy :workflowEngine, :aUser ;
+    # or using sub-properties
+    wf:wasLaunchedBy :aUser ;
+    wf:ranInWorkflowEngine :workflowEngine ;
+    # TODO: Roles for agents?
+
+    prov:used [ a prov:Role, prov2:EntityInRole, wf:ValueAtPort ;
+        # role-as-complement-of syntax
+        # http://lists.w3.org/Archives/Public/public-prov-wg/2011Sep/0170.html
+        prov2:wasComplementOf :input ;
+        # Or should that be a specific sub-property?
+        prov2:wasAssumedBy :input ;
+
+        prov2:assumedRole def:inName ;
+        prov2:assumedRoleName "name" ;
+        wf:seenAtPort def:inName 
+    ] ;
+    prov2:startedAt "2011-09-16 16:17:00" ;
+    prov2:endedAt "2011-09-16 16:17:05" .
+
+
+:constant a prov:ProcessExecution, wf:Process ;
+    prov:wasControlledBy :workflowRun ;
+    wf:wasSubProcessExecutionOf :workflowRun ;
+
+    prov2:recipe def:procConstant ;
+    wf:wasDefinedBy def:procConstant ;
+
+    prov2:startedAt "2011-09-16 16:17:00" ;
+    prov2:endedAt "2011-09-16 16:17:01" .
+
+
+:hello a prov:Entity, wf:Value ;
+    :wasGeneratedBy :constant ;
+    impl:value [
+        a cnt:ContentAsText ;
+        cnt:chars "Hello, ";
+    ] .
+
+# need to complement :hello to describe the role
+:helloValue a prov:Entity, wf:Value, prov2:EntityInRole ;
+    prov:wasGeneratedBy :constant ;
+    prov2:wasComplementOf :hello ;
+    prov2:assumedRole def:value ;
+    impl:value [
+        a cnt:ContentAsText ;
+        cnt:chars "Hello, ";
+    ] .
+
+:combine a prov:ProcessExecution, wf:Process ;
+    prov:wasControlledBy :workflowRun ;
+    wf:wasSubProcessExecutionOf :workflowRun ;
+
+    prov2:recipe def:procCat ;
+    wf:wasDefinedBy def:procCat ;
+
+    prov:used [ a prov2:EntityInRole ;
+        prov2:wasComplementOf :hello ;
+        prov2:usedAt "2011-09-16 16:17:02" ;
+        prov2:assumedRole def:catIn1 ;
+        wf:seenAtPort def:catIn1 ;
+      ], [ a prov2:EntityInRole ;
+        prov2:wasComplementOf :input ;
+        prov2:usedAt "2011-09-16 16:17:01" ;
+        prov2:assumedRole def:catIn2 ;
+        wf:seenAtPort def:catIn2;
+      ] ;
+    prov2:startedAt "2011-09-16 16:17:01" ;
+    prov2:endedAt "2011-09-16 16:17:03" .
+
+:combined a prov:Entity ;
+    prov:wasGeneratedBy :combine ;
+    impl:value [
+        a cnt:ContentAsText ;
+        cnt:chars "Hello, Steve"
+    ] .
+
+:shasum a prov:ProcessExecution, wf:Process ;
+    prov:wasControlledBy :workflowRun ;
+    wf:wasSubProcessExecutionOf :workflowRun ;
+
+    prov2:recipe def:procShasum ;
+    wf:wasDefinedBy def:procShasum ;
+
+    # used :combined directly here without a role, this process only
+    # have a single input anyway
+    prov:used :combined ; 
+
+    prov2:startedAt "2011-09-16 16:17:04" ;
+    prov2:endedAt "2011-09-16 16:17:05" .
+
+:sha1 a prov:Entity ;
+    # How was this generated..? Well, see below
+    wf:value [
+        a cnt:ContentAsText ;
+        cnt:characterEncoding "UTF-8" ;
+        cnt:chars "a33d1fb1658d4fbf017de59ab67437a3eb5ff50d" 
+    ] .
+    
+# As the output of :shasum
+:sha1OutputFromShasum a prov:Entity, prov2:EntityInRole ;
+    prov:wasGeneratedBy :shasum ;
+    prov2:wasComplementOf :sha1 ;
+    prov2:assumedRole def:shaOut ;
+    wf:wasSeenAt def:shaOut ; 
+    prov2:generatedAt "2011-09-16 16:17:05" ;
+    wf:value [
+        a cnt:ContentAsText ;
+        cnt:characterEncoding "UTF-8" ;
+        cnt:chars "a33d1fb1658d4fbf017de59ab67437a3eb5ff50d" 
+    ] .
+
+# Or as the output of :workflowRun
+:sha1OutputFromWorkflow a prov:Entity, prov2:EntityInRole ;
+    prov:wasGeneratedBy :workflowRun ;
+    prov2:wasComplementOf :sha1 ;
+    prov2:assumedRole def:output ;
+    wf:wasSeenAt def:output ; 
+    # A bit later than :sha1OutputFromShasum 
+    prov2:generatedAt "2011-09-16 16:17:06" ;
+    wf:value [
+        a cnt:ContentAsText ;
+        cnt:characterEncoding "UTF-8" ;
+        cnt:chars "a33d1fb1658d4fbf017de59ab67437a3eb5ff50d" 
+    ] .
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ontology/examples/ontology-extensions/workflow/workflowDefinition.ttl	Mon Sep 19 16:12:53 2011 +0100
@@ -0,0 +1,33 @@
+@prefix wf: <http://www.example.com/scientific-workflow#> .
+@prefix impl: <http://company.example.org/engine-implementation#> .
+@prefix def: <http://my.example.com/workflow-definition#> .
+
+
+# Workflow definition
+def:workflowDef a wf:ProcessDefinition, impl:Workflow ;
+    wf:definesSubProcess def:procShasum, def:procCombine, def:procConstant ;
+    wf:definesInput def:inName ;
+    wf:definesOutput def:output .
+
+# Sub-processes
+def:procConstant a impl:Constant ;
+    impl:constant "Hello, " ;
+    wf:definesOutput def:constantValue .
+
+def:procShasum a impl:Command ;
+    impl:command "sha1sum" ;
+    wf:definesInput def:shaIn ;    
+    wf:definesOutput def:shaOut .    
+
+def:procCat a impl:Command ;
+    impl:command "cat" ;
+    wf:definesInput def:catIn1 ;
+    wf:definesInput def:catIn2 ;
+    wf:definesOutput def:catOut .    
+
+# links
+def:constantValue wf:linksTo def:catIn1 .
+def:inName wf:linksTo def:catIn2 .
+def:catOut wf:linksTo def:shaIn .
+def:shaOut wf:linksTo def:output .
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ontology/examples/ontology-extensions/workflow/workflowOntology.ttl	Mon Sep 19 16:12:53 2011 +0100
@@ -0,0 +1,94 @@
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix cnt: <http://www.w3.org/2011/content#> .
+
+# Note: Stian just made up this URI
+@prefix prov: <http://www.w3.org/2011/prov#> .
+# Not yet in the official OWL
+@prefix prov2: <http://www.w3.org/2011/prov/new#> .
+
+# Domain-specific ontologies
+@prefix wf: <http://www.example.com/scientific-workflow#> .
+
+wf:WorkflowEngine a owl:Class ;
+    rdfs:subClassOf prov:Agent .
+
+wf:Process a owl:Class ;
+    rdfs:subClassOf prov:Agent, prov:ProcessExecution .
+
+wf:ProvenanceExport a owl:Class ;
+    rdfs:subClassOf prov:Agent, prov:ProcessExecution .
+
+wf:ProcessDefinition a owl:Class ;
+    rdfs:subClassOf prov2:Recipe .
+
+wf:wasDefinedBy a owl:ObjectProperty, owl:FunctionalProperty ;
+    rdfs:subPropertyOf prov2:recipe ;
+    rdfs:range wf:Process ;
+    rdfs:domain wf:ProcessDefinition .
+
+wf:Value a owl:Class ;
+    # For simplicity, but in real workflow systems two identical values 
+    # can have different identities because they have different
+    # origin/generation
+    owl:has_key ( wf:value ) ;
+    rdfs:subClassOf prov:Entity .
+
+wf:value a owl:ObjectProperty, owl:FunctionalProperty ;
+    rdfs:range wf:Value ;
+    rdfs:domain cnt:Content .
+
+
+wf:Port a owl:Class .
+wf:Input a owl:Class ;
+    rdfs:subClassOf wf:Port .
+wf:Output a owl:Class ;
+    rdfs:subClassOf wf:Port  .
+
+wf:ValueAtPort a owl:Class, [
+        a owl:Restriction ;
+        owl:onProperty wf:seenAtPort ;
+        owl:cardinality 1 
+    ] ;
+    rdfs:subClassOf wf:Value, prov:EntityInRole .
+
+wf:seenAtPort a owl:ObjectProperty ;
+    rdfs:domain wf:ValueAtPort ;
+    rdfs:range wf:Port ;    
+    rdfs:subPropertyOf prov2:assumedRole.
+
+
+wf:linksTo a owl:ObjectProperty ;
+    # Not distinguishing between input/output here, to allow the duality of
+    # workflow ports
+    rdfs:domain wf:Port ;    
+    rdfs:range wf:Port .
+
+wf:wasLaunchedBy a owl:ObjectProperty ;
+    rdfs:domain wf:Process ;
+    rdfs:range prov:Agent ;
+    rdfs:subPropertyOf prov:wasControlledBy .
+
+wf:ranInWorkflowEngine a owl:ObjectProperty ;
+    rdfs:domain wf:Process ;
+    rdfs:range wf:WorkflowEngine ;
+    rdfs:subPropertyOf prov:wasControlledBy .
+
+wf:wasSubProcessExecutionOf a owl:ObjectProperty ;
+    rdfs:domain wf:Process ;
+    rdfs:range wf:Process ;
+    rdfs:subPropertyOf prov:wasControlledBy .
+
+wf:definesSubProcess a owl:ObjectProperty ;
+    rdfs:range wf:ProcessDefinition ;
+    rdf:domain wf:ProcessDefinition .
+
+wf:definesInput a owl:ObjectProperty ;
+    rdfs:range wf:ProcessDefinition ;
+    rdf:domain wf:Input .
+
+wf:definesOutput a owl:ObjectProperty ;
+    rdfs:range wf:ProcessDefinition ;
+    rdf:domain wf:Output .
+