--- a/primer/WD-prov-primer-20120724/Primer.html Mon Jul 16 12:02:48 2012 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1248 +0,0 @@
-<!DOCTYPE html>
-<html>
- <head>
- <title>PROV Model Primer</title>
- <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
- <!--
- === NOTA BENE ===
- For the three scripts below, if your spec resides on dev.w3 you can check them
- out in the same tree and use relative links so that they'll work offline,
- -->
- <!-- PM -->
- <style type="text/css">
- .note { font-size:small; margin-left:50px }
- </style>
-
- <script src="http://www.w3.org/2007/OWL/toggles.js" class="remove"></script>
- <script src="http://dev.w3.org/2009/dap/ReSpec.js/js/respec.js" class="remove"></script>
-
- <script class="remove">
- var addExtraReferences = function() {
- for (var k in extraReferences)
- berjon.biblio[k] = extraReferences[k];
- };
- var extraReferences = {
- "PROV-DM":
- "Luc Moreau, Paolo Missier"+
- "<a href=\"http://www.w3.org/TR/prov-dm/\"><cite>The PROV Data Model and Abstract Syntax Notation</cite></a>. "+
- "Working Draft, "+
- "URL: <a href=\"http://www.w3.org/TR/prov-dm/\">http://www.w3.org/TR/prov-dm/</a>",
-
- "PROV-O":
- "Satya Sahoo, Deborah McGuinness"+
- "<a href=\"http://www.w3.org/TR/prov-o/\"><cite>The PROV Ontology: Model and Formal Semantics</cite></a>. "+
- "Working Draft, "+
- "URL: <a href=\"http://www.w3.org/TR/prov-o/\">http://www.w3.org/TR/prov-o/</a>",
-
- "PROV-N":
- "Luc Moreau, Paolo Missier"+
- "<a href=\"http://www.w3.org/TR/prov-n/\"><cite>PROV-N: The PROV Notation</cite></a>. "+
- "Working Draft, "+
- "URL: <a href=\"http://www.w3.org/TR/prov-n/\">http://www.w3.org/TR/prov-n/</a>",
-
- "PROV-AQ":
- "Graham Klyne, Paul Groth"+
- "<a href=\"http://www.w3.org/TR/prov-aq/\"><cite>PROV-AQ: Provenance Access and Query</cite></a>. "+
- "Working Draft, "+
- "URL: <a href=\"http://www.w3.org/TR/prov-aq/\">http://www.w3.org/TR/prov-aq/</a>",
-
- "PROV-CONSTRAINTS":
- "James Cheney, Paolo Missier, Luc Moreau"+
- "<a href=\"http://www.w3.org/TR/prov-constraints/\"><cite>Constraints of the Provenance Data Model</cite></a>. "+
- "Working Draft, "+
- "URL: <a href=\"http://www.w3.org/TR/prov-constraints/\">http://www.w3.org/TR/prov-constraints/</a>",
-
- "TURTLE":
- "Eric Prud'hommeaux, Gavin Carothers"+
- "<a href=\"http://www.w3.org/TR/2011/WD-turtle-20110809/\"><cite>Turtle: Terse RDF Triple Language</cite></a>. "+
- "9 August 2011. W3C Working Draft. "+
- "URL: <a href=\"http://www.w3.org/TR/2011/WD-turtle-20110809/\">http://www.w3.org/TR/2011/WD-turtle-20110809/</a>",
-
- "PROVENANCE-XG":
- "Yolanda Gil, James Cheney, Paul Groth, Olaf Hartig, Simon Miles, Luc Moreau, Paulo Pinheiro da Silva"+
- "<a href=\"http://www.w3.org/2005/Incubator/prov/XGR-prov/\"><cite>Provenance XG Final Report</cite></a>. "+
- "8 December 2010. "+
- "URL: <a href=\"http://www.w3.org/2005/Incubator/prov/XGR-prov/\">http://www.w3.org/2005/Incubator/prov/XGR-prov/</a>"
- };
-
- var respecConfig = {
- // specification status (e.g. WD, LCWD, NOTE, etc.). If in doubt use ED.
- //specStatus: "FPWD-NOTE",
- specStatus: "WD",
-
- // the specification's short name, as in http://www.w3.org/TR/short-name/
- shortName: "prov-primer",
-
- // if your specification has a subtitle that goes below the main
- // formal title, define it here
- subtitle : "",
-
- // if you wish the publication date to be other than today, set this
- publishDate: "2012-07-24",
-
- // if the specification's copyright date is a range of years, specify
- // the start date here:
- // copyrightStart: "2005"
-
- // if there is a previously published draft, uncomment this and set its YYYY-MM-DD date
- // and its maturity status
- previousPublishDate: "2012-05-03",
- previousMaturity: "WD",
-
- // if there a publicly available Editor's Draft, this is the link
- edDraftURI: "http://dvcs.w3.org/hg/prov/raw-file/default/primer/Primer.html",
-
- // if this is a LCWD, uncomment and set the end of its review period
- // lcEnd: "2009-08-05",
-
- // if you want to have extra CSS, append them to this list
- // it is recommended that the respec.css stylesheet be kept
- extraCSS: ["http://dev.w3.org/2009/dap/ReSpec.js/css/respec.css"],
-
- // editors, add as many as you like
- // only "name" is required
- editors: [
- { name: "Yolanda Gil", url: "http://www.isi.edu/~gil/",
- company: "Information Sciences Institute, University of Southern California, US" },
- { name: "Simon Miles", url: "http://www.inf.kcl.ac.uk/staff/simonm",
- company: "King's College London, UK" },
- ],
-
- // authors, add as many as you like.
- // This is optional, uncomment if you have authors as well as editors.
- // only "name" is required. Same format as editors.
-
- authors: [
- { name: "<a href=\"http://semanticweb.org/wiki/Khalid_Belhajjame\">Khalid Belhajjame</a>",
- company: "University of Manchester" },
- { name: "Helena Deus",
- company: "Digital Enterprise Research Institute (DERI), NUI Galway" },
- { name: "<a href=\"http://www.oeg-upm.net/index.php/en/phdstudents/28-dgarijo\">Daniel Garijo</a>",
- company: "Universidad Politécnica de Madrid" },
- { name: "Graham Klyne",
- company: "University of Oxford" },
- { name: "<a href=\"http://www.cs.ncl.ac.uk/people/Paolo.Missier\">Paolo Missier</a>",
- company: "Newcastle University" },
- { name: "<a href=\"http://soiland-reyes.com/stian/\">Stian Soiland-Reyes</a>",
- company: "University of Manchester" },
- { name: "<a href=\"http://tw.rpi.edu/web/person/StephanZednik\">Stephan Zednik</a>",
- company: "Rensselaer Polytechnic Institute" },
- ],
-
- // name of the WG
- wg: "Provenance Working Group",
-
- // URI of the public WG page
- wgURI: "http://www.w3.org/2011/prov/",
-
- // name (with the @w3c.org) of the public mailing to which comments are due
- wgPublicList: "public-prov-wg",
-
- // URI of the patent status for this WG, for Rec-track documents
- // !!!! IMPORTANT !!!!
- // This is important for Rec-track documents, do not copy a patent URI from a random
- // document unless you know what you're doing. If in doubt ask your friendly neighbourhood
- // Team Contact.
- wgPatentURI: "http://www.w3.org/2004/01/pp-impl/46974/status",
-
- // Add extraReferences to bibliography database
- preProcess: [addExtraReferences]
- };
- </script>
- </head>
- <body>
- <section id="abstract">
- <p>
- This document provides an intuitive introduction and guide to the
- PROV specification for provenance on the Web. PROV is a core data model for
- provenance for building representations of the entities, people and
- processes involved in producing a piece of data or thing in the world.
- This primer explains the fundamental PROV concepts and provides examples
- of its use. The primer is intended as a starting point for those wishing
- to create or use PROV data.
- </p>
-
- <!-- p>
- This is a document for internal discussion, which will ultimately
- evolve in the first Public Working Draft of the Primer.</p -->
- </section>
-
- <section id="sotd">
- This document is part of a set of specifications aiming to define the
- various aspects that are necessary to achieve the vision of
- interoperable interchange of provenance information in heterogeneous
- environments such as the Web. This document is an
- intuitive introduction and guide with simple illustrative examples
- of the core aspects of PROV.
-
- <h4>PROV Family of Specifications</h4>
- The PROV family of specifications aims to define the various aspects that are necessary to achieve the vision of inter-operable
- interchange of provenance information in heterogeneous environments such as the Web.
- The specifications are as follows.
- <ul>
- <li> PROV-PRIMER, a primer for the PROV data model (this document),</li>
- <li> PROV-DM, the PROV data model for provenance,</li>
- <li> PROV-DM-CONSTRAINTS, a set of constraints applying to the PROV data model,</li>
- <li> PROV-N, a notation for provenance aimed at human consumption,</li>
- <li> PROV-O, the PROV ontology, an OWL-RL ontology allowing the mapping of PROV to RDF;</li>
- <li> PROV-AQ, the mechanisms for accessing and querying provenance; </li>
- <li> PROV-SEM, a formal semantics for the PROV data model.</li>
- <li> PROV-XML, an XML schema for the PROV data model.</li>
- </ul>
- <h4>How to read the PROV Family of Specifications</h4>
- <ul>
- <li>The primer is the entry point to PROV offering a pedagogical presentation of the provenance model.</li>
- <li>The Linked Data and Semantic Web community should focus on PROV-O defining PROV classes and properties specified in an OWL-RL ontology. For further details, PROV-DM and PROV-DM-CONSTRAINTS specify the constraints applicable to the data model, and its interpretation. PROV-SEM provides a mathematical semantics.</li>
- <li>The XML community should focus on PROV-XML defining an XML schema for PROV-DM. Further details can also be found in PROV-DM, PROV-DM-CONSTRAINTS, and PROV-SEM.</li>
- <li>Developers seeking to retrieve or publish provenance should focus of PROV-AQ.</li>
- <li>Readers seeking to implement other PROV serializations
- should focus on PROV-DM and PROV-DM-CONSTRAINTS. PROV-O, PROV-N, PROV-XML offer examples of mapping to RDF, text, and XML, respectively.</li>
- </ul>
-
-
- </section>
-
- <section>
- <h2>Introduction</h2>
- <p>
- This primer document provides an accessible introduction to the PROV
- specification for provenance on the Web.
- The <i>provenance</i> of digital objects represents their origins. PROV is a
- proposed specification to express provenance records,
- which contain <i>descriptions</i> of the entities
- and activities involved in producing and delivering or otherwise influencing a
- given object.
- For the remainder of this document, we use the term 'provenance' to refer also
- to records of provenance, except where the distinction is important for clarity.
- By knowing the provenance of an object, we can perceive how to
- use it. Provenance can be used for many purposes, such as
- understanding how data was collected so it can be meaningfully used, determining
- ownership and rights over an object, making judgments about information to
- determine whether to trust it, verifying that the process and steps used to obtain a
- result complies with given requirements, and reproducing how something was generated.
- </p>
-
- <p>
- As a specification for provenance, PROV accommodates all those different uses
- of provenance. Different people may have different perspectives on provenance,
- and as a result different types of information might be captured in provenance records.
- <ul>
- <li>
- One perspective might focus on <i>agent-centered provenance</i>, that is, what entities
- were involved in generating or manipulating the information in question. For example,
- in the provenance of a picture in a news article we might capture the photographer who
- took it, the person that edited it, and the newspaper that published it.
- </li>
- <li>
- A second perspective
- might focus on <i>object-centered provenance</i>, by tracing the origins of portions of a
- document to other documents. An example is having a web page that was assembled from content
- from a news article, quotes of interviews with experts, and a chart that plots data from a
- government agency.
- </li>
- <li>A third perspective one might take is on <i>process-centered provenance</i>,
- capturing the actions and steps taken to generate the information in question. For example, a
- chart may have been generated by invoking a service to retrieve data from a database, then
- extracting certain statistics from the data using some statistics package, and finally
- processing these results with a graphing tool.
- </li>
- </ul>
- </p>
-
- <p>
- Provenance records are metadata. There are other kinds of metadata that is
- not provenance. For example, the size of an image is metadata of
- that image but it is not provenance.
- For general background on provenance, a
- comprehensive overview of requirements, use cases, prior research, and proposed
- vocabularies for provenance are available from the
- Final Report of the W3C Provenance Incubator Group [[PROVENANCE-XG]].
- That document contains three general scenarios
- that may help identify the provenance aspects of planned applications and
- help plan the design of a provenance system.
- </p>
-
- <p>
- This primer document aims to ease the adoption of the PROV specifications by providing:
- </p>
- <ul>
- <li>An intuitive explanation of how PROV models provenance. A detailed description of
- all the concepts and relations in the PROV Data Model is provided in [[PROV-DM]].</li>
- <li>A simple self-contained example that illustrates how to produce and use PROV assertions, highlighting how
- to combine PROV with other popular vocabularies such as FOAF and Dublin Core. A description
- of the formal PROV ontology (PROV-O) can be found in [[PROV-O]].</li>
- <li>Example snippets using a notation of PROV designed for human
- consumption (PROV-N). Details of this notation can be found at [[PROV-N]].</li>
- </ul>
-
- <p>There are additional reference documents for PROV that are not covered in this
- primer, including the PROV Access and Query aspects of the specification [[PROV-AQ]],
- the constraints on the PROV data model [[PROV-CONSTRAINTS]],
- a formal semantics of the PROV data model (PROV-SEM), and the PROV XML notation
- (PROV-XML). </p>
-
- </section>
-
- <section>
- <h2>Intuitive overview of PROV</h2>
-
- <p>
- This section provides an intuitive explanation of the main concepts in PROV.
- As with the rest of this document, it should be treated as a starting point for
- understanding the model. The PROV data model document [[PROV-DM]]
- provides precise definitions and constraints [[PROV-CONSTRAINTS]] to be used.
- </p>
- <p>
- The following diagram provides a high level overview of the structure of PROV records,
- limited to some key PROV concepts discussed in this document.
- The diagram is the same that appears in the [[PROV-DM]] document.
- Note that because PROV is meant to describe how things were created or delivered,
- PROV relations are named so they can be used in assertions about the past.
- </p>
-
- <div style="text-align: center;">
- <img src="OverviewDiagram.png" alt="PROV-DM overview"/>
- </div>
-
- <section>
- <h3>Entities</h3>
-
- <p>
- In PROV, physical, digital, conceptual, or other kinds of thing are called
- <i>entities</i>.
- Examples of such entities are a web page, a chart, and a spellchecker.
- Provenance records can describe the provenance of entities, and
- an entity’s provenance may refer to many other entities. For example, a document D is
- an entity whose provenance refers to other entities such as a chart inserted into D,
- and the dataset that was used to create that chart.
- Entities may be described as having different attributes and
- be described from different perspectives. For example,
- document D as stored in my file system, the second version of document D,
- and D as an evolving document,
- are three distinct entities for which we may describe provenance.
- </p>
- </section>
-
- <section>
- <h3>Activities</h3>
-
- <p>
- <i>Activities</i> are how entities come into
- existence and how their attributes change to become new entities,
- often making use of previously existing entities to achieve this.
- They are
- dynamic aspects of the world, such as actions, processes, etc.
- For example, if the second version of document D was generated
- by a translation from the first version of the document in another language,
- then this translation is an activity.
- </p>Use
- </section>
-
- <section>
- <h3>Usage and Generation</h3>
- <p>
- Activities <i>generate</i> new entities.
- For example, writing a document brings the document into existence, while
- revising the document brings a new version into existence.
- Activities also make <i>use</i> of entities. For example, revising a document
- to fix spelling mistakes uses the original version of the document as well
- as a list of corrections.
- Generation does not always occur at the end of an activity, and an activity may generate entities
- part-way through.
- Likewise, usage does not always occur at the beginning of an activity.
- </p>
- </section>
-
- <section>
- <h3>Agents and Responsibility</h3>
- <p>
- An <i>agent</i> takes a role in an activity such
- that the agent can be assigned some degree of <i>responsibility</i> for the activity taking
- place.
- An agent can be a person, a piece of software, an inanimate object, an organization, or
- other entities that may be ascribed responsibility.
- When an agent has some responsibility for an activity, PROV says the agent was
- <i>associated</i> with the activity, where several agents may be associated with
- an activity and vice-versa.
- Consider a chart displaying some statistics
- regarding crime rates over time in a linear regression. To represent the
- provenance of that chart, we could state that the person who created the
- chart was an agent involved in its creation, and that the software used to
- create the chart was also an agent involved in that activity.
- An agent may be <i>acting on behalf</i> of others, e.g. an employee on behalf of their
- organization, and we can express such chains of responsibility in the provenance.
- </p>
- <p>
- We can also describe that an entity is <i>attributed</i> to an agent to express
- the agent's responsibility for that entity, possibly along with other agents.
- This description can be understood as a shorthand
- for saying that the agent was responsible for the activity which generated
- the entity.
- </p>
- <p>
- One may want to describe the provenance of an agent. For example, an organization
- responsible for the creation of a report may evolve over time as the report is written as
- some members leave and others join. To make provenance assertions about an agent in PROV ,
- the agent must be declared explicitly both as an agent and as an entity.
- </p>
-
- </section>
-
- <section>
- <h3>Roles</h3>
- <p>
- A <i>role</i> is a description of the function or the part that an entity
- played in an activity. Roles specify
- the relationship between an entity and an activity, i.e. how
- the activity used or generated the entity. Roles also specify how agents are
- involved in an activity, qualifying their participation in the activity or
- specifying for what aspect of it each agent was responsible.
- For example, an agent may play the role of "editor" in an activity that uses
- one entity in the role of "document to be edited" and another in the role of
- "addition to be made to the document", to generate a further entity in the role of "edited document".
- Roles are application specific, so PROV does not define any particular roles.
- </p>
- <!--p>Roles are intended as an extension point in the model; it is expected users will define and use custom role taxonomies. Role interpretation is application specific.</p -->
- </section>
-
- <section>
- <h3>Derivation and Revision</h3>
- <p>
- When one entity's existence, content, characteristics and so on are
- at least partly due to another entity, then we say that the former was
- <i>derived</i> from the latter. For example, one document may contain
- material copied from another,
- and a chart was derived from the data that it illustrates.
- </p>
- <p>
- PROV allows some common, specialized kinds of derivation to be described.
- For example, a given entity, such as a document, may go through multiple <i>revisions</i>
- over time. Between revisions,
- one or more attributes of the entity may change.
- In PROV, the result of each revision is a new entity.
- PROV allows one to relate those entities by making a description that
- one was a revision of another.
- Another specialized kind of derivation is to say that one entity, commonly
- a document, <i>quotes</i> from another.
- </p>
- </section>
-
- <section>
- <h3>Plans</h3>
- <p>
- Activities may follow pre-defined procedures, such as recipes, tutorials, instructions, or workflows.
- PROV refers to these, in general, as <i>plans</i>, and allows the description that a plan was followed, by agents,
- in executing an activity.
- </p>
- </section>
-
- <section>
- <h3>Time</h3>
- <p>
- Time is often a critical aspect of provenance.
- PROV allows the timing of significant events to be described, including
- when an entity was generated or used, or when an activity started
- and finished. For example, the model can be used to describe facts such as when a new
- version of a document was created (generation time), or when a document was
- edited (start and end of the editing activity).
- </p>
- </section>
-
- <section>
- <h3>Alternate Entities and Specialization</h3>
- <p>
- Entities are defined in a flexible way in PROV, allowing for different
- perspectives to be taken as appropriate for the application. The following
- are examples illustrating this idea.
- </p>
- <ul>
- <li>The same entity can appear with different descriptions in a provenance record
- because each appearance emphasizes different aspects of the entity, e.g.
- a book may be described by its title in one place and by its author and publication date
- in another.</li>
- <li>The same entity can evolve over time into different
- versions, e.g. a document that is repeatedly updated and has
- subsequent releases over time.</li>
- <li>The same entity can be copied
- or replicated, e.g. a document may be copied to several directories.</li>
- <li>An entity can go through different incarnations, e.g.
- a committee producing a report may have a set of members when the report
- is first released and have a different set of members when an update of
- the report is released.</li>
- </ul>
- <p>
- In all these situations,
- the more specific entities (the versions, copies, incarnations) can be said in PROV to be <i>specializations</i>
- of the more general entity (the book, the document or the committee as a general entity).
- The specific entities in each example are also <i>alternates</i> of each other, as they are specializations
- of the same general entity.
- Being aware that two entities are alternates allows those
- consuming the PROV data to know that understanding the provenance of one entity is salient
- to understanding the provenance of the other. Knowing that alternate entities are
- specializations of another allows a consumer of PROV to refer to the general entity
- with a unique identifier even though it is specified as different alternates
- throughout the provenance records.
- </p>
- </section>
-
- </section>
-
- <section>
- <h2>Examples of Key Concepts in PROV</h2>
-
- <p>
- In the following sections, we show how PROV can be used to model
- provenance in a specific example scenario. Samples of PROV data are given.
- These samples use the namespace prefixes <b>prov</b>, denoting
- terms from the PROV ontology, and <b>ex</b>, denoting terms specific to the example.
- We illustrate in these examples how PROV can be used in combination with other
- languages, such as FOAF and Dublin Core (with namespace prefix <b>foaf</b> and
- <b>dcterms</b> respectively).
- </p>
- <p>
- The samples can be displayed in one or more of the following formats.
- </p>
- <ul>
- <li>[[PROV-O]] RDF triples, expressed using the [[TURTLE]] notation.</li>
- <li>[[PROV-N]] expressions.</li>
- </ul>
- <p>
- Select the formats to display using the buttons below. Note that if all formats
- are hidden, the worked examples may not make sense!
- </p>
- <form action="#"><p>
- <input id="hide-turtle-examples" type="button" value="Hide Turtle Examples"
- onclick="
- set_display_by_class('pre','turtle example','none');
- set_display_by_class('div','turtle example','none');
- set_display_by_id('hide-turtle-examples','none');
- set_display_by_id('show-turtle-examples','');" />
- <input id="show-turtle-examples" type="button" value="Show Turtle Examples"
- style="display:none"
- onclick="
- set_display_by_class('pre','turtle example','');
- set_display_by_class('div','turtle example','');
- set_display_by_id('hide-turtle-examples','');
- set_display_by_id('show-turtle-examples','none');" />
- <input id="hide-provn-examples" type="button" value="Hide PROV-N Examples"
- onclick="
- set_display_by_class('pre','provn example','none');
- set_display_by_class('div','provn example','none');
- set_display_by_id('hide-provn-examples','none');
- set_display_by_id('show-provn-examples','');" />
- <input id="show-provn-examples" type="button" value="Show PROV-N Examples"
- style="display:none"
- onclick="
- set_display_by_class('pre','provn example','');
- set_display_by_class('div','provn example','');
- set_display_by_id('hide-provn-examples','');
- set_display_by_id('show-provn-examples','none');" />
- </p>
- </form>
-
-
- <section>
- <h3>Entities</h3>
-
- <p>
- An online newspaper publishes an article with a chart about crime statistics making using of data (GovData) provided through a government portal.
- The article includes a chart based on the data, with data values composed (aggregated) by
- geographical regions.
- </p>
- <p>
- A blogger, Betty, looking at the article, spots what she thinks to be an error in the chart.
- Betty retrieves a record of the provenance of the article, describing how it was created.
- </p>
- <p>Betty finds the following descriptions of entities in the provenance.</p>
- <div class="turtle example">
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:article a prov:Entity ;
- dcterms:title "Crime rises in cities" .
- ex:dataset1 a prov:Entity .
- ex:regionList a prov:Entity .
- ex:composition a prov:Entity .
- ex:chart1 a prov:Entity .
- </pre>
- </div>
- <div class="provn example">
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- entity(ex:article, [dcterms:title='Crime rises in cities'])
- entity(ex:dataSet1)
- entity(ex:regionList)
- entity(ex:composition)
- entity(ex:chart1)
- </pre>
- </div>
- <p>
- These statements, in order, refer to the article (<code>ex:article</code>),
- an original data set (<code>ex:dataSet1</code>),
- a list of regions (<code>ex:regionList</code>),
- data aggregated by region (<code>ex:composition</code>),
- and a chart (<code>ex:chart1</code>), and state that each is an entity.
- Any entity may have attributes, such as the title
- of the article, expressed using <code>dcterms:title</code> above.
- </p>
- <p>
- PROV data is commonly visualized for human consumption using particular conventions,
- which we will introduce over the following sections. To start with, entities
- are denoted using ovals, as shown below.
- </p>
- <img src="images/entities.png" alt="Visualization of the example entities"/>
- </section>
-
- <section>
- <h3>Activities</h3>
-
- <p>
- Further, the provenance describes that there was
- an activity (<code>ex:compile</code>) denoting the compilation of the
- chart from the data set.
- </p>
- <div class="turtle example">
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:compile a prov:Activity .
- </pre>
- </div>
- <div class="provn example">
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- activity(ex:compile)
- </pre>
- </div>
- <p>
- The provenance also includes reference to the more specific steps involved in this compilation,
- which are first composing the data by region (<code>ex:compose</code>) and then generating the
- chart graphic (<code>ex:illustrate</code>).
- </p>
- <!--p>
- Further, the provenance describes the activities involved in generating chart from the data,
- first composing the data by region (<code>ex:compose</code>) and then generating the
- chart graphic (<code>ex:illustrate</code>).
- </p -->
- <div class="turtle example">
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:compose a prov:Activity .
- ex:illustrate a prov:Activity .
- </pre>
- </div>
- <div class="provn example">
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- activity(ex:compose)
- activity(ex:illustrate)
- </pre>
- </div>
- <p>
- In visualizations of the PROV data, activities are depicted as rectangles, as below.
- </p>
- <img src="images/activities.png" alt="Visualization of the example activities"/>
- </section>
-
- <section>
- <h3>Usage and Generation</h3>
-
- <p>
- Concluding the basic description of what occurred, the provenance
- describes the key relations among the above
- entities and activities, i.e. the usage of an entity by an activity,
- or the generation of an entity by an activity.
- </p>
- <p>
- For example, the descriptions below state that the composition activity
- (<code>ex:compose</code>) used the original data set, that it used the list of
- regions, and that the composed data was generated by this activity.
- </p>
- <div class="turtle example">
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:compose prov:used ex:dataSet1 ;
- prov:used ex:regionList .
- ex:composition prov:wasGeneratedBy ex:compose .
- </pre>
- </div>
- <div class="provn example">
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- used(ex:compose, ex:dataSet1, -)
- used(ex:compose, ex:regionList, -)
- wasGeneratedBy(ex:composition, ex:compose, -)
- </pre>
- </div>
- <p>
- Similarly, the chart graphic creation activity (<code>ex:illustrate</code>)
- used the composed data, and the chart was generated by this activity.
- </p>
- <div class="turtle example">
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:illustrate prov:used ex:composition .
- ex:chart1 prov:wasGeneratedBy ex:illustrate .
- </pre>
- </div>
- <div class="provn example">
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- used(ex:illustrate, ex:composition, -)
- wasGeneratedBy(ex:chart1, ex:illustrate, -)
- </pre>
- </div>
- <p>
- In visualizing the PROV data, usage and generation are connections between
- entities and activities. The arrows point from the future to the past.
- </p>
- <img src="images/use-generate.png" alt="Connection of the entities and activities by use and generation links"/>
- </section>
-
- <section>
- <h3>Agents and Responsibility</h3>
-
- <p>
- Digging deeper, Betty wants to know who compiled the chart.
- Betty sees that Derek was involved in both the composition and
- chart creation activities:
- </p>
- <div class="turtle example">
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:compose prov:wasAssociatedWith ex:derek .
- ex:illustrate prov:wasAssociatedWith ex:derek .
- </pre>
- </div>
- <div class="provn example">
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- wasAssociatedWith(ex:compose, ex:derek, -)
- wasAssociatedWith(ex:illustrate, ex:derek, -)
- </pre>
- </div>
- <p>
- The record for Derek provides the
- following description that
- Derek is an agent, specifically a person, followed by non-PROV information
- giving attributes of Derek.
- </p>
- <div class="turtle example">
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:derek a prov:Agent ;
- a prov:Person ;
- foaf:givenName "Derek"^^xsd:string ;
- foaf:mbox <mailto:derek@example.org> .
- </pre>
- </div>
- <div class="provn example">
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- agent(ex:derek,
- [prov:type='prov:Person', foaf:givenName='Derek',
- foaf:mbox='<mailto:derek@example.org>'])
- </pre>
- </div>
- <p>
- Derek works as part of an organization, Chart Generators Inc, and so the provenance
- declares that he acts on their behalf. Note that the organization is itself
- an agent.
- </p>
- <div class="turtle example">
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:derek prov:actedOnBehalfOf ex:chartgen .
- ex:chartgen a prov:Agent ;
- a prov:Organization ;
- foaf:name "Chart Generators Inc" .
- </pre>
- </div>
- <div class="provn example">
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- agent(ex:chartgen,
- [prov:type='prov:Organization',
- foaf:name = 'Chart Generators Inc'])
- actedOnBehalfOf(ex:derek, ex:chartgen, ex:compose)
- </pre>
- </div>
- <p>
- Finally, there is an explicit statement in the provenance that the chart was
- attributed to Derek.
- </p>
- <div class="turtle example">
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:chart1 prov:wasAttributedTo ex:derek .
- </pre>
- </div>
- <div class="provn example">
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- wasAttributedTo(ex:chart1, ex:derek)
- </pre>
- </div>
-
- <p>
- We can extend our graphical depiction to show the agents, association and attribution links.
- </p>
- <img src="images/agents.png" alt="Agents added to provenance graph and linked to entities and activities" width="95%"/>
- </section>
-
- <section>
- <h3>Roles</h3>
-
- <p>
- For Betty to understand where the error lies, she needs to have more detailed
- information on how entities have been used in and generated
- by activities. Betty has determined that <code>ex:compose</code> used
- entities <code>ex:regionList</code> and <code>ex:dataSet1</code>, but she does not
- know what function these entities played in the processing. Betty
- also knows that <code>ex:derek</code> was associated with the activities, but she does
- not know if Derek was the analyst responsible for determining how the data
- should be composed.
- </p>
- <p>
- The above information is described as roles in the provenance. The composition
- activity involved entities in four roles: the data to be composed (<code>ex:dataToCompose</code>),
- the regions to aggregate by (<code>ex:regionsToAggregateBy</code>), the
- resulting composed data (<code>ex:composedData</code>), and the
- analyst doing the composition (<code>ex:analyst</code>).
- </p>
- <div class="turtle example">
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:dataToCompose a prov:Role .
- ex:regionsToAggregateBy a prov:Role .
- ex:composedData a prov:Role .
- ex:analyst a prov:Role .
- </pre>
- <p>
- Examples in the sections above show descriptions of the simple facts that the
- composition activity used, generated and was enacted by entities/agents.
- For example, the usage of the data set by the compose activity is expressed
- as follows.
- </p>
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:compose prov:used ex:dataSet1 .
- </pre>
- <p>
- The
- provenance can contain more details of exactly how these entities and agents
- were involved in the activity.
- To express this, PROV-O refers to <i>qualified usage</i>, <i>qualified generation</i>, etc.,
- which are descriptions consisting of several statements about how usage, generation, etc. took place.
- For example, we may describe the plan followed by an agent in performing an activity, or
- the time at which an activity generated an entity, both illustrated later.
- Another example of qualified involvement is the role an entity played in an activity.
- The descriptions below state
- that the composition activity (<code>ex:compose</code>) included the usage
- of the government data set (<code>ex:dataSet1</code>) in the role of the data
- to be composed (<code>ex:dataToCompose</code>).
- </p>
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:compose prov:qualifiedUsage [
- a prov:Usage ;
- prov:entity ex:dataSet1 ;
- prov:hadRole ex:dataToCompose
- ] .
- </pre>
- </div>
- <div class="provn example">
- <p>
- In PROV-N, the role is expressed as one of the list of attributes in the used
- expression, with the attribute name <code>prov:role</code>.
- </p>
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- used(ex:compose, ex:dataSet1, -, [prov:role='ex:dataToCompose'])
- </pre>
- </div>
- <p>
- This can then be distinguished from the same activity's usage of the list of
- regions because the roles played are different.
- </p>
- <div class="turtle example">
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:compose prov:qualifiedUsage [
- a prov:Usage ;
- prov:entity ex:regionList ;
- prov:hadRole ex:regionsToAggregateBy
- ] .
- </pre>
- </div>
- <div class="provn example">
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- used(ex:compose, ex:regionList, -, [prov:role='ex:regionsToAggregateBy'])
- </pre>
- </div>
- <p>
- Similarly, the provenance includes descriptions that the same activity was
- enacted in a particular way by Derek, so it indicates that he had the role of
- <code>ex:analyst</code>, and that the entity <code>ex:composition</code> took the role of the composed
- data in what the activity generated:
- </p>
- <div class="turtle example">
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:compose prov:qualifiedAssociation [
- a prov:Association ;
- prov:agent ex:derek ;
- prov:hadRole ex:analyst
- ] .
- ex:composition prov:qualifiedGeneration [
- a prov:Generation ;
- prov:activity ex:compose ;
- prov:hadRole ex:composedData
- ] .
- </pre>
- </div>
- <div class="provn example">
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- wasAssociatedWith(ex:compose, ex:derek, -, [prov:role='ex:analyst'])
- wasGeneratedBy(ex:composition, ex:compose, -, [prov:role='ex:composedData'])
- </pre>
- </div>
- <p>
- Depicting the above visually, we have the following.
- </p>
- <img src="images/roles.png" alt="Provenance graph annotated with roles played by entities and agents"/>
- </section>
-
- <section>
- <h3>Derivation and Revision</h3>
-
- <p>
- After looking at the detail of the compilation activity, there appears
- to be nothing wrong, so Betty concludes the error is in the government dataset.
- She looks at the dataset <code>ex:dataSet1</code>,
- and sees that it is missing data from one of the zipcodes in the area. She contacts
- the government agency, and a new version of GovData is created, declared to be the
- next revision of the data. The provenance of this new dataset,
- <code>ex:dataSet2</code>, states that it is a revision of the
- old data set, <code>ex:dataSet1</code>.
- </p>
- <div class="turtle example">
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:dataSet2 a prov:Entity ;
- prov:wasRevisionOf ex:dataSet1 .
- </pre>
- </div>
- <div class="provn example">
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- entity(ex:dataSet2)
- wasDerivedFrom(ex:dataSet2, ex:dataSet1, [prov:type='prov:Revision'])
- </pre>
- </div>
- <p>
- Derek notices that there is a new dataset available and creates a new chart based on the revised data,
- using another compilation activity. Betty checks the article again at a
- later point, and wants to know if it is based on the old or new GovData.
- She sees a new description stating that the new chart is derived from the new dataset.
- </p>
- <div class="turtle example">
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:chart2 a prov:Entity ;
- prov:wasDerivedFrom ex:dataSet2 .
- </pre>
- </div>
- <div class="provn example">
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- wasDerivedFrom(ex:chart2, ex:dataSet2)
- </pre>
- </div>
- <p>and that the new chart is a revision of the original one:
- </p>
- <div class="turtle example">
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:chart2 a prov:Entity ;
- prov:wasRevisionOf ex:chart1 .
- </pre>
- </div>
- <div class="provn example">
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- entity(ex:chart2)
- wasDerivedFrom(ex:chart2, ex:chart1, [prov:type='prov:Revision'])
- </pre>
- </div>
- <p>
- Derivation and revision are connections between entities, and so depicted
- with arrows in our visualization.
- </p>
- <img src="images/derivation.png" alt="Derivation and revision links between entities"/>
- </section>
-
- <section>
- <h3>Plans</h3>
-
- <p>
- Betty then wishes to know whether the new data set correctly addresses
- the error that existed before. The provenance of the new dataset,
- <code>ex:dataSet2</code>, describes not only who performed the corrections,
- Edith, but also what instructions she followed in doing so (in PROV terms, the plan).
- First, the correction activity (<code>ex:correct</code>), the person who corrected
- it, Edith (<code>ex:edith</code>), and the correction instructions (<code>ex:instructions</code>)
- are described.
- <div class="turtle example">
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:correct a prov:Activity .
- ex:edith a prov:Agent, prov:Person .
- ex:instructions a prov:Plan .
- </pre>
- </div>
- <div class="provn example">
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- activity(ex:correct)
- agent(ex:edith, [prov:type='prov:Person'])
- entity(ex:instructions)
- </pre>
- </div>
- <div class="turtle example">
- <p>
- The connection between them is expressed in PROV-O using a qualified association giving details of
- how Edith was associated with the correction activity,
- including that she followed the above correction instructions.
- </p>
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:correct prov:qualifiedAssociation [
- a Association ;
- prov:agent ex:edith ;
- prov:hadPlan ex:instructions
- ] .
- ex:dataSet2 prov:wasGeneratedBy ex:correct .
- </pre>
- </div>
- <div class="provn example">
- <p>
- In PROV-N, the plan is an optional parameter to wasAssociatedWith descriptions.
- </p>
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- wasAssociatedWith(ex:correct, ex:edith, ex:instructions)
- wasGeneratedBy(ex:dataSet2, ex:correct, -)
- </pre>
- </div>
- <p>
- Plans are additional information about the connection from an activity to
- an agent, and so, in our visualization, connect to the link between them.
- </p>
- <img src="images/plans.png" alt="Annotaion of example provenance graph with plan followed"/>
- </section>
-
- <section>
- <h3>Time</h3>
-
- <p>
- The government agency that produced GovData is concerned to know how long
- the incorrect chart was in circulation before the corrected chart was created.
- That is, they wish to compare the times at which the original and the corrected
- charts were generated. The snippet below shows that the second chart
- was generated roughly a month after the first.
- </p>
- <div class="turtle example">
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:chart1 prov:generatedAtTime "2012-03-02T10:30:00"^^xsd:dateTime .
- ex:chart2 prov:generatedAtTime "2012-04-01T15:21:00"^^xsd:dateTime .
- </pre>
- </div>
- <div class="provn example">
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- wasGeneratedBy(ex:chart1, ex:compile, 2012-03-02T10:30:00)
- wasGeneratedBy(ex:chart2, ex:compile2, 2012-04-01T15:21:00)
- </pre>
- </div>
-
- <p>
- To ensure their procedures are efficient, the agency also wish to know how long the
- corrections took once the error was discovered. That is, they wish to know the
- start and end times of the correction activity (<code>ex:correct</code>).
- These details are expressed as follows, showing that the corrections took a
- little over a day.
- </p>
- <div class="turtle example">
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:correct prov:startedAtTime "2012-03-31T09:21:00"^^xsd:dateTime ;
- prov:endedAtTime "2012-04-01T15:21:00"^^xsd:dateTime .
- </pre>
- </div>
- <div class="provn example">
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- activity(ex:correct, 2012-03-31T09:21:00, 2012-04-01T15:21:00)
- </pre>
- </div>
- <p>
- Time is visualized as additional information regarding activities or the
- links between activities and entities or agents.
- </p>
- <img src="images/time.png" alt="Annotation of provenance graph with example timestamps" width="95%"/>
- </section>
-
- <section>
- <h3>Alternate Entities and Specialization</h3>
-
- <p>
- Before noticing anything wrong with the government data, Betty had already
- posted a blog entry about the article. The blog entry had its own published
- provenance, stating that it quoted some text from the article.
- </p>
- <div class="turtle example">
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:blogEntry a prov:Entity ;
- prov:wasQuotedFrom ex:article .
- </pre>
- </div>
- <div class="provn example">
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- entity(ex:blogEntry)
- wasDerivedFrom(ex:blogEntry, ex:article, [prov:type='prov:Quotation'])
- </pre>
- </div>
- <p>
- The newspaper, from past experience, anticipated that there could be revisions
- to the article, and so created identifiers for both the article in general
- (<code>ex:article</code>) as a URI that got redirected to the first version of the article (<code>ex:articleV1</code>),
- allowing both to be referred to as entities in provenance data.
- In the provenance records, the newspaper describes the connection between the two: that
- the first version of the article is a specialization of the article in general.
- </p>
- <div class="turtle example">
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:articleV1 a prov:Entity ;
- prov:specializationOf ex:article .
- </pre>
- </div>
- <div class="provn example">
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- entity(ex:articleV1)
- specializationOf(ex:articleV1, ex:article)
- </pre>
- </div>
- <p>
- Later, after the data set is corrected and the new chart generated, a new version
- of the article is created, <code>ex:articleV2</code>, with its own URI where the article
- is redirected to. To ensure that those
- consulting the provenance of <code>ex:articleV2</code> understand that it
- is connected with the provenance of <code>ex:article</code> and <code>ex:articleV1</code>,
- the newspaper describes how these entities are related.
- </p>
- <div class="turtle example">
- <div class="exampleheader">
- <span class="exampleheader"><b>Turtle Example</b></span></div>
- <pre>
- ex:articleV2 prov:specializationOf ex:article .
- ex:articleV2 prov:alternateOf ex:articleV1 .
- </pre>
- </div>
- <div class="provn example">
- <div class="exampleheader">
- <span class="exampleheader"><b>PROV-N Example</b></span></div>
- <pre>
- specializationOf(ex:articleV2, ex:article)
- alternateOf(ex:articleV2, ex:articleV1)
- </pre>
- </div>
- <p>
- Note that above we could have also
- stated that <code>ex:articleV2</code> was a revision of <code>ex:articleV1</code>,
- as we did between <code>ex:chart2</code> and <code>ex:chart1</code>,
- which would describe more concretely how the alternate entities are related.
- Specialization and alternate relations connect entities, and so are visualized
- as links between them.
- </p>
- <img src="images/specialization.png" alt="Specialization and alternate links between entities"/>
- </section>
-
- <section>
- <h3>Complete PROV data</h3>
- <p>
- We visualize the whole example below.
- </p>
- <img src="images/everything.png" alt="Provenance graph for whole example" width="95%"/>
- </section>
- </section>
-
- <section class="appendix">
- <h2>Acknowledgements</h2>
- <p>
- The Provenance Working Group members.
- </p>
- </section>
-
- <section class="appendix">
- <h2>Changes Since Second Public Working Draft</h2>
- <ul>
- <li>Added selection between Turtle and PROV-N examples with supporting text.</li>
- <li>Removed PROV-N appendix.</li>
- <li>Removed reference to accounts, using bundles instead.</li>
- <li>Scaled the width of wide figures to aid printing.</li>
- <li>Used generatedAtTime rather than qualified PROV-O.</li>
- <li>Many clarifications and improvements throughout the text.</li>
- </ul>
- </section>
-
-</body></html>