--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data-cube-ucr/data-cube-ucr-20120222/respec-config.js Thu Feb 28 00:59:24 2013 +0100
@@ -0,0 +1,96 @@
+var respecConfig = {
+ // specification status (e.g. WD, LCWD, NOTE, etc.). If in doubt use ED.
+ specStatus: "WG-NOTE",
+ //copyrightStart: "2010",
+
+ // the specification's short name, as in http://www.w3.org/TR/short-name/
+ shortName: "data-cube-ucr",
+ //subtitle: "",
+ // if you wish the publication date to be other than today, set this
+ publishDate: "2013-02-27",
+
+ // if there is a previously published draft, uncomment this and set its YYYY-MM-DD date
+ // and its maturity status
+ //previousPublishDate: "2012-02-22",
+ //previousMaturity: "ED",
+ //previousDiffURI: "http://dvcs.w3.org/hg/gld/bp/",
+ //diffTool: "http://www.aptest.com/standards/htmldiff/htmldiff.pl",
+
+ // if there a publicly available Editor's Draft, this is the link
+ edDraftURI: "http://dvcs.w3.org/hg/gld/raw-file/default/data-cube-ucr/data-cube-ucr-20120222/index.html",
+
+ // if this is a LCWD, uncomment and set the end of its review period
+ // lcEnd: "2009-08-05",
+
+ // if you want to have extra CSS, append them to this list
+ // it is recommended that the respec.css stylesheet be kept
+ extraCSS: [
+ "http://dev.w3.org/2009/dap/ReSpec.js/css/respec.css"
+ ],
+
+ // editors, add as many as you like
+ // only "name" is required
+ editors: [
+ { name: "Benedikt Kämpgen", url: "http://www.aifb.kit.edu/web/Benedikt_K%C3%A4mpgen/en", company: "FZI Karlsruhe", companyURL: "http://www.fzi.de/index.php/en" },
+ { name: "Richard Cyganiak", url: "http://richard.cyganiak.de/", company: "DERI, NUI Galway", companyURL: "http://www.deri.ie/" },
+ ],
+
+ // authors, add as many as you like.
+ // This is optional, uncomment if you have authors as well as editors.
+ // only "name" is required. Same format as editors.
+
+ //authors: [],
+
+ // name of the WG
+ wg: "Government Linked Data Working Group",
+
+ // URI of the public WG page
+ wgURI: "http://www.w3.org/2011/gld/",
+
+ // name of the public mailing to which comments are due
+ wgPublicList: "public-gld-comments",
+
+ // URI of the patent status for this WG, for Rec-track documents
+ // !!!! IMPORTANT !!!!
+ // This is important for Rec-track documents, do not copy a patent URI from a random
+ // document unless you know what you're doing. If in doubt ask your friendly neighbourhood
+ // Team Contact.
+ wgPatentURI: "",
+ maxTocLevel: 3,
+ preProcess: [ preProc ]
+ //alternateFormats: [ {uri: "diff-20110507.html", label: "diff to previous version"} ],
+};
+
+function updateExample(doc, content) {
+ // perform transformations to make it render and prettier
+ content = content.replace(/<!--/, '');
+ content = content.replace(/-->/, '');
+ content = doc._esc(content);
+ content = content.replace(/\*\*\*\*([^*]*)\*\*\*\*/g, '<span class="diff">$1</span>') ;
+ return content ;
+}
+
+function updateDTD(doc, content) {
+ // perform transformations to
+ // make it render and prettier
+ content = '<pre class="dtd">' + doc._esc(content) + '</pre>';
+ content = content.replace(/!ENTITY % ([^ \t\r\n]*)/g, '!ENTITY <span class="entity">% $1</span>');
+ content = content.replace(/!ELEMENT ([^ \t$]*)/mg, '!ELEMENT <span class="element">$1</span>');
+ return content;
+}
+
+function updateSchema(doc, content) {
+ // perform transformations to
+ // make it render and prettier
+ content = '<pre class="dtd">' + doc._esc(content) + '</pre>';
+ content = content.replace(/<xs:element\s+name="([^&]*)"/g, '<xs:element name="<span class="element" id="schema_element_$1">$1</span>"') ;
+ return content;
+}
+
+function updateTTL(doc, content) {
+ // perform transformations to
+ // make it render and prettier
+ content = '<pre class="sh_sourceCode">' + doc._esc(content) + '</pre>';
+ content = content.replace(/@prefix/g, '<span class="sh_keyword">@prefix</span>');
+ return content;
+}
--- a/data-cube-ucr/data-cube-ucr-2013-02-28/index.html Thu Feb 28 00:55:59 2013 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1063 +0,0 @@
-<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML+RDFa 1.1//EN' 'http://www.w3.org/MarkUp/DTD/xhtml-rdfa-2.dtd'>
-<html lang="en" dir="ltr">
-<head>
-<title>Use Cases and Requirements for the Data Cube Vocabulary</title>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-
-<script src="respec-ref.js"></script>
-<script src="respec-config.js"></script>
-<link rel="stylesheet" type="text/css" href="local-style.css">
-<style>/*****************************************************************
- * ReSpec 3 CSS
- * Robin Berjon - http://berjon.com/
- *****************************************************************/
-
-/* --- INLINES --- */
-em.rfc2119 {
- text-transform: lowercase;
- font-variant: small-caps;
- font-style: normal;
- color: #900;
-}
-
-h1 acronym, h2 acronym, h3 acronym, h4 acronym, h5 acronym, h6 acronym, a acronym,
-h1 abbr, h2 abbr, h3 abbr, h4 abbr, h5 abbr, h6 abbr, a abbr {
- border: none;
-}
-
-dfn {
- font-weight: bold;
-}
-
-a.internalDFN {
- color: inherit;
- border-bottom: 1px solid #99c;
- text-decoration: none;
-}
-
-a.externalDFN {
- color: inherit;
- border-bottom: 1px dotted #ccc;
- text-decoration: none;
-}
-
-a.bibref {
- text-decoration: none;
-}
-
-cite .bibref {
- font-style: normal;
-}
-
-code {
- color: #ff4500;
-}
-
-
-/* --- --- */
-ol.algorithm { counter-reset:numsection; list-style-type: none; }
-ol.algorithm li { margin: 0.5em 0; }
-ol.algorithm li:before { font-weight: bold; counter-increment: numsection; content: counters(numsection, ".") ") "; }
-
-/* --- TOC --- */
-.toc a, .tof a {
- text-decoration: none;
-}
-
-a .secno, a .figno {
- color: #000;
-}
-
-ul.tof, ol.tof {
- list-style: none outside none;
-}
-
-.caption {
- margin-top: 0.5em;
- font-style: italic;
-}
-
-/* --- TABLE --- */
-table.simple {
- border-spacing: 0;
- border-collapse: collapse;
- border-bottom: 3px solid #005a9c;
-}
-
-.simple th {
- background: #005a9c;
- color: #fff;
- padding: 3px 5px;
- text-align: left;
-}
-
-.simple th[scope="row"] {
- background: inherit;
- color: inherit;
- border-top: 1px solid #ddd;
-}
-
-.simple td {
- padding: 3px 10px;
- border-top: 1px solid #ddd;
-}
-
-.simple tr:nth-child(even) {
- background: #f0f6ff;
-}
-
-/* --- DL --- */
-.section dd > p:first-child {
- margin-top: 0;
-}
-
-.section dd > p:last-child {
- margin-bottom: 0;
-}
-
-.section dd {
- margin-bottom: 1em;
-}
-
-.section dl.attrs dd, .section dl.eldef dd {
- margin-bottom: 0;
-}
-</style><link rel="stylesheet" href="https://www.w3.org/StyleSheets/TR/W3C-ED"><!--[if lt IE 9]><script src='https://www.w3.org/2008/site/js/html5shiv.js'></script><![endif]--></head>
-<body><div class="head">
- <p>
-
- <a href="http://www.w3.org/"><img width="72" height="48" src="https://www.w3.org/Icons/w3c_home" alt="W3C"></a>
-
- </p>
- <h1 class="title" id="title">Use Cases and Requirements for the Data Cube Vocabulary</h1>
-
- <h2 id="w3c-editor-s-draft-25-february-2013"><abbr title="World Wide Web Consortium">W3C</abbr> Editor's Draft 25 February 2013</h2>
- <dl>
-
- <dt>This version:</dt>
- <dd><a href="http://dvcs.w3.org/hg/gld/raw-file/default/data-cube-ucr/index.html">http://dvcs.w3.org/hg/gld/raw-file/default/data-cube-ucr/index.html</a></dd>
- <dt>Latest published version:</dt>
- <dd><a href="http://www.w3.org/TR/data-cube-ucr/">http://www.w3.org/TR/data-cube-ucr/</a></dd>
-
-
- <dt>Latest editor's draft:</dt>
- <dd><a href="http://dvcs.w3.org/hg/gld/raw-file/default/data-cube-ucr/index.html">http://dvcs.w3.org/hg/gld/raw-file/default/data-cube-ucr/index.html</a></dd>
-
-
-
-
-
-
-
-
- <dt>Editors:</dt>
- <dd><a href="http://www.aifb.kit.edu/web/Benedikt_K%C3%A4mpgen/en">Benedikt Kämpgen</a>, <a href="http://www.fzi.de/index.php/en">FZI Karlsruhe</a></dd>
-<dd><a href="http://richard.cyganiak.de/">Richard Cyganiak</a>, <a href="http://www.deri.ie/">DERI, NUI Galway</a></dd>
-
-
- </dl>
-
-
-
-
-
- <p class="copyright">
- <a href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright">Copyright</a> ©
- 2013
-
- <a href="http://www.w3.org/"><abbr title="World Wide Web Consortium">W3C</abbr></a><sup>®</sup>
- (<a href="http://www.csail.mit.edu/"><abbr title="Massachusetts Institute of Technology">MIT</abbr></a>,
- <a href="http://www.ercim.eu/"><abbr title="European Research Consortium for Informatics and Mathematics">ERCIM</abbr></a>,
- <a href="http://www.keio.ac.jp/">Keio</a>), All Rights Reserved.
- <abbr title="World Wide Web Consortium">W3C</abbr> <a href="http://www.w3.org/Consortium/Legal/ipr-notice#Legal_Disclaimer">liability</a>,
- <a href="http://www.w3.org/Consortium/Legal/ipr-notice#W3C_Trademarks">trademark</a> and
- <a href="http://www.w3.org/Consortium/Legal/copyright-documents">document use</a> rules apply.
- </p>
-
-
- <hr>
-</div>
-
- <section id="abstract" class="introductory"><h2>Abstract</h2>
- <p>Many national, regional and local governments, as well as other
- organizations inside and outside of the public sector, create
- statistics. There is a need to publish those statistics in a
- standardized, machine-readable way on the web, so that statistics can
- be freely integrated and reused in consuming applications. This
- document is a collection of use cases for a standard vocabulary to
- publish statistics as Linked Data.</p>
- </section><section id="sotd" class="introductory"><h2>Status of This Document</h2>
-
-
-
- <p>
- <em>This section describes the status of this document at the time of its publication. Other
- documents may supersede this document. A list of current <abbr title="World Wide Web Consortium">W3C</abbr> publications and the latest revision
- of this technical report can be found in the <a href="http://www.w3.org/TR/"><abbr title="World Wide Web Consortium">W3C</abbr> technical reports
- index</a> at http://www.w3.org/TR/.</em>
- </p>
-
- <p>
- This is a working document of the <a href="http://www.w3.org/2011/gld/wiki/Data_Cube_Vocabulary">Data
- Cube Vocabulary project</a> within the <a href="http://www.w3.org/2011/gld/"><abbr title="World Wide Web Consortium">W3C</abbr> Government Linked Data
- Working Group</a>. Feedback is welcome and should be sent to the <a href="mailto:public-gld-comments@w3.org">public-gld-comments@w3.org
- mailing list</a>.
- </p>
-
- <p>
- This document was published by the <a href="http://www.w3.org/2011/gld/">Government Linked Data Working Group</a> as an Editor's Draft.
-
-
- If you wish to make comments regarding this document, please send them to
- <a href="mailto:public-gld-comments@w3.org">public-gld-comments@w3.org</a>
- (<a href="mailto:public-gld-comments-request@w3.org?subject=subscribe">subscribe</a>,
- <a href="http://lists.w3.org/Archives/Public/public-gld-comments/">archives</a>).
-
-
-
-
- All comments are welcome.
-
-
- </p><p>
- Publication as an Editor's Draft does not imply endorsement by the <abbr title="World Wide Web Consortium">W3C</abbr> Membership.
- This is a draft document and may be updated, replaced or obsoleted by other documents at
- any time. It is inappropriate to cite this document as other than work in progress.
- </p>
-
-
- <p>
-
- This document was produced by a group operating under the
- <a href="http://www.w3.org/Consortium/Patent-Policy-20040205/">5 February 2004 <abbr title="World Wide Web Consortium">W3C</abbr> Patent Policy</a>.
-
-
-
-
- <abbr title="World Wide Web Consortium">W3C</abbr> maintains a <a href="" rel="disclosure">public list of any patent disclosures</a>
-
- made in connection with the deliverables of the group; that page also includes instructions for
- disclosing a patent. An individual who has actual knowledge of a patent which the individual believes contains
- <a href="http://www.w3.org/Consortium/Patent-Policy-20040205/#def-essential">Essential Claim(s)</a> must disclose the
- information in accordance with <a href="http://www.w3.org/Consortium/Patent-Policy-20040205/#sec-Disclosure">section
- 6 of the <abbr title="World Wide Web Consortium">W3C</abbr> Patent Policy</a>.
-
-
- </p>
-
-
-
-
-</section><section id="toc"><h2 class="introductory">Table of Contents</h2><ul class="toc"><li class="tocline"><a href="#introduction" class="tocxref"><span class="secno">1. </span>Introduction</a></li><li class="tocline"><a href="#terminology" class="tocxref"><span class="secno">2. </span>Terminology</a></li><li class="tocline"><a href="#use-cases" class="tocxref"><span class="secno">3. </span>Use cases</a><ul class="toc"><li class="tocline"><a href="#publishing-statistical-data" class="tocxref"><span class="secno">3.1 </span>Publishing statistical data</a><ul class="toc"><li class="tocline"><a href="#publishing-general-statistics-in-a-machine-readable-and-application-independent-way-uc-1" class="tocxref"><span class="secno">3.1.1 </span>Publishing general statistics in a machine-readable and
- application-independent way (UC 1)</a></li><li class="tocline"><a href="#publishing-one-or-many-ms-excel-spreadsheet-files-with-statistical-data-on-the-web-uc-2" class="tocxref"><span class="secno">3.1.2 </span>Publishing one or many MS excel spreadsheet files with
- statistical data on the web (UC 2)</a></li><li class="tocline"><a href="#publishing-sdmx-as-linked-data-uc-3" class="tocxref"><span class="secno">3.1.3 </span>Publishing SDMX as Linked Data (UC 3)</a></li><li class="tocline"><a href="#publishing-sensor-data-as-statistics-uc-4" class="tocxref"><span class="secno">3.1.4 </span>Publishing sensor data as statistics (UC 4)</a></li><li class="tocline"><a href="#registering-statistical-data-in-dataset-catalogs-uc-5" class="tocxref"><span class="secno">3.1.5 </span>Registering statistical data in dataset catalogs (UC 5)</a></li><li class="tocline"><a href="#making-transparent-transformations-on-or-different-versions-of-statistical-data-uc-6" class="tocxref"><span class="secno">3.1.6 </span>Making transparent transformations on or different versions of
- statistical data (UC 6)</a></li></ul></li><li class="tocline"><a href="#consuming-published-statistical-data" class="tocxref"><span class="secno">3.2 </span>Consuming published statistical data</a><ul class="toc"><li class="tocline"><a href="#simple-chart-visualizations-of-integrated-published-statistical-datasets-uc-7" class="tocxref"><span class="secno">3.2.1 </span>Simple chart visualizations of (integrated) published
- statistical datasets (UC 7)</a></li><li class="tocline"><a href="#uploading-published-statistical-data-in-google-public-data-explorer-uc-8" class="tocxref"><span class="secno">3.2.2 </span>Uploading published statistical data in Google Public Data
- Explorer (UC 8)</a></li><li class="tocline"><a href="#allow-online-analytical-processing-on-published-datasets-of-statistical-data-uc-9" class="tocxref"><span class="secno">3.2.3 </span>Allow Online Analytical Processing on published datasets of
- statistical data (UC 9)</a></li><li class="tocline"><a href="#transforming-published-statistics-into-xbrl-uc-10" class="tocxref"><span class="secno">3.2.4 </span>Transforming published statistics into XBRL (UC 10)</a></li></ul></li></ul></li><li class="tocline"><a href="#requirements" class="tocxref"><span class="secno">4. </span>Requirements</a><ul class="toc"><li class="tocline"><a href="#publishing-requirements" class="tocxref"><span class="secno">4.1 </span>Publishing requirements</a><ul class="toc"><li class="tocline"><a href="#machine-readable-and-application-independent-representation-of-statistics" class="tocxref"><span class="secno">4.1.1 </span>Machine-readable and application-independent representation of
- statistics</a></li><li class="tocline"><a href="#representing-statistics-from-various-resource" class="tocxref"><span class="secno">4.1.2 </span>Representing statistics from various resource</a></li><li class="tocline"><a href="#communicating-exposing-statistics-on-the-web" class="tocxref"><span class="secno">4.1.3 </span>Communicating, exposing statistics on the web</a></li><li class="tocline"><a href="#coverage-of-typical-statistics-metadata" class="tocxref"><span class="secno">4.1.4 </span>Coverage of typical statistics metadata</a></li><li class="tocline"><a href="#expressing-hierarchies" class="tocxref"><span class="secno">4.1.5 </span>Expressing hierarchies</a></li><li class="tocline"><a href="#machine-readable-and-application-independent-representation-of-statistics-1" class="tocxref"><span class="secno">4.1.6 </span>Machine-readable and application-independent representation of
- statistics</a></li><li class="tocline"><a href="#expressing-aggregation-relationships-in-data-cube" class="tocxref"><span class="secno">4.1.7 </span>Expressing aggregation relationships in Data Cube</a></li><li class="tocline"><a href="#scale---how-to-publish-large-amounts-of-statistical-data" class="tocxref"><span class="secno">4.1.8 </span>Scale - how to publish large amounts of statistical data</a></li><li class="tocline"><a href="#compliance-levels-or-criteria-for-well-formedness" class="tocxref"><span class="secno">4.1.9 </span>Compliance-levels or criteria for well-formedness</a></li><li class="tocline"><a href="#declaring-relations-between-cubes" class="tocxref"><span class="secno">4.1.10 </span>Declaring relations between Cubes</a></li></ul></li><li class="tocline"><a href="#consumption-requirements" class="tocxref"><span class="secno">4.2 </span>Consumption requirements</a><ul class="toc"><li class="tocline"><a href="#finding-statistical-data" class="tocxref"><span class="secno">4.2.1 </span>Finding statistical data</a></li><li class="tocline"><a href="#retrival-of-fine-grained-statistics" class="tocxref"><span class="secno">4.2.2 </span>Retrival of fine grained statistics</a></li><li class="tocline"><a href="#understanding---end-user-consumption-of-statistical-data" class="tocxref"><span class="secno">4.2.3 </span>Understanding - End user consumption of statistical data</a></li><li class="tocline"><a href="#comparing-and-trusting-statistics" class="tocxref"><span class="secno">4.2.4 </span>Comparing and trusting statistics</a></li><li class="tocline"><a href="#integration-of-statistics" class="tocxref"><span class="secno">4.2.5 </span>Integration of statistics</a></li><li class="tocline"><a href="#scale---how-to-consume-large-amounts-of-statistical-data" class="tocxref"><span class="secno">4.2.6 </span>Scale - how to consume large amounts of statistical data</a></li><li class="tocline"><a href="#common-internal-representation-of-statistics-to-be-exported-in-other-formats" class="tocxref"><span class="secno">4.2.7 </span>Common internal representation of statistics, to be exported
- in other formats</a></li><li class="tocline"><a href="#dealing-with-imperfect-statistics" class="tocxref"><span class="secno">4.2.8 </span>Dealing with imperfect statistics</a></li></ul></li></ul></li><li class="tocline"><a href="#acknowledgments" class="tocxref"><span class="secno">A. </span>Acknowledgments</a></li></ul></section>
-
-
-
- <section id="introduction">
- <!--OddPage--><h2><span class="secno">1. </span>Introduction</h2>
-
- <p>Many national, regional and local governments, as well as other
- organizations inside and outside of the public sector, create
- statistics. There is a need to publish those statistics in a
- standardized, machine-readable way on the web, so that statistics can
- be freely linked, integrated and reused in consuming applications.
- This document is a collection of use cases for a standard vocabulary
- to publish statistics as Linked Data.</p>
- </section>
-
-
- <section id="terminology">
- <!--OddPage--><h2><span class="secno">2. </span>Terminology</h2>
- <p>
- <dfn id="dfn-statistics">Statistics</dfn>
- is the <a href="http://en.wikipedia.org/wiki/Statistics">study</a> of
- the collection, organization, analysis, and interpretation of data. A
- statistic is a statistical dataset.
- </p>
-
- <p>
- A
- <dfn id="dfn-statistical-dataset">statistical dataset</dfn>
- comprises multidimensional data - a set of observed values organized
- along a group of dimensions, together with associated metadata. Basic
- structure of (aggregated) statistical data is a multidimensional table
- (also called a cube) <a href="#ref-SDMX">[SDMX]</a>.
- </p>
-
- <p>
- <dfn id="dfn-source-data">Source data</dfn>
- is data from datastores such as RDBs or spreadsheets that acts as a
- source for the Linked Data publishing process.
- </p>
-
- <p>
- <dfn id="dfn-metadata">Metadata</dfn>
- about statistics defines the data structure and give contextual
- information about the statistics.
- </p>
-
- <p>
- A format is
- <dfn id="dfn-machine-readable">machine-readable</dfn>
- if it is amenable to automated processing by a machine, as opposed to
- presentation to a human user.
- </p>
-
- <p>
- A
- <dfn id="dfn-publisher">publisher</dfn>
- is a person or organization that exposes source data as Linked Data on
- the Web.
- </p>
-
- <p>
- A
- <dfn id="dfn-consumer">consumer</dfn>
- is a person or agent that uses Linked Data from the Web.
- </p>
-
- </section>
-
-
- <section id="use-cases">
- <!--OddPage--><h2><span class="secno">3. </span>Use cases</h2>
- <p>
- This section presents scenarios that would be enabled by the existence
- of a standard vocabulary for the representation of statistics as
- Linked Data. Since a draft of the specification of the cube vocabulary
- has been published, and the vocabulary already is in use, we will call
- this standard vocabulary after its current name RDF Data Cube
- vocabulary (short <a href="#ref-QB">[QB]</a>) throughout the document.
- </p>
- <p>We distinguish between use cases of publishing statistical data,
- and use cases of consuming statistical data since requirements for
- publishers and consumers of statistical data differ.</p>
- <section id="publishing-statistical-data">
- <h3><span class="secno">3.1 </span>Publishing statistical data</h3>
-
- <section id="publishing-general-statistics-in-a-machine-readable-and-application-independent-way-uc-1">
- <h4><span class="secno">3.1.1 </span>Publishing general statistics in a machine-readable and
- application-independent way (UC 1)</h4>
- <p>More and more organizations want to publish statistics on the
- web, for reasons such as increasing transparency and trust. Although
- in the ideal case, published data can be understood by both humans and
- machines, data often is simply published as CSV, PDF, XSL etc.,
- lacking elaborate metadata, which makes free usage and analysis
- difficult.</p>
-
- <p>The goal in this use case is to use a machine-readable and
- application-independent description of common statistics with use of
- open standards. The use case is fulfilled if QB will be a Linked Data
- vocabulary for encoding statistical data that has a hypercube
- structure and as such can describe common statistics in a
- machine-readable and application-independent way.</p>
-
- <p>
- An example scenario of this use case has been to publish the Combined
- Online Information System (<a href="http://data.gov.uk/resources/coins">COINS</a>). There, HM
- Treasury, the principal custodian of financial data for the UK
- government, released previously restricted information from its
- Combined Online Information System (COINS). Five data files were
- released containing between 3.3 and 4.9 million rows of data. The
- COINS dataset was translated into RDF for two reasons:
- </p>
-
- <ol>
- <li>To publish statistics (e.g., as data files) are too large to
- load into widely available analysis tools such as Microsoft Excel, a
- common tool-of-choice for many data investigators.</li>
- <li>COINS is a highly technical information source, requiring
- both domain and technical skills to make useful applications around
- the data.</li>
- </ol>
- <p>Publishing statistics is challenging for the several reasons:</p>
- <p>
- Representing observations and measurements requires more complex
- modeling as discussed by Martin Fowler <a href="#Fowler1997">[Fowler,
- 1997]</a>: Recording a statistic simply as an attribute to an object
- (e.g., a the fact that a person weighs 185 pounds) fails with
- representing important concepts such as quantity, measurement, and
- observation.
- </p>
-
- <p>Quantity comprises necessary information to interpret the value,
- e.g., the unit and arithmetical and comparative operations; humans and
- machines can appropriately visualize such quantities or have
- conversions between different quantities.</p>
-
- <p>A Measurement separates a quantity from the actual event at
- which it was collected; a measurement assigns a quantity to a specific
- phenomenon type (e.g., strength). Also, a measurement can record
- metadata such as who did the measurement (person), and when was it
- done (time).</p>
-
- <p>Observations, eventually, abstract from measurements only
- recording numeric quantities. An Observation can also assign a
- category observation (e.g., blood group A) to an observation. Figure
- demonstrates this relationship.</p>
- <p>
- </p><div class="fig">
- <a href="figures/modeling_quantity_measurement_observation.png"><img src="figures/modeling_quantity_measurement_observation.png" alt="Modeling quantity, measurement, observation"> </a>
- <div>Modeling quantity, measurement, observation</div>
- </div>
-
- <p></p>
-
- <p>QB deploys the multidimensional model (made of observations with
- Measures depending on Dimensions and Dimension Members, and further
- contextualized by Attributes) and should cater for these complexity in
- modelling.</p>
- <p>Another challenge is that for brevity reasons and to avoid
- repetition, it is useful to have abbreviation mechanisms such as
- assigning overall valid properties of observations at the dataset or
- slice level, and become implicitly part of each observation. For
- instance, in the case of COINS, all of the values are in thousands of
- pounds sterling. However, one of the use cases for the linked data
- version of COINS is to allow others to link to individual
- observations, which suggests that these observations should be
- standalone and self-contained – and should therefore have explicit
- multipliers and units on each observation. One suggestion is to author
- data without the duplication, but have the data publication tools
- "flatten" the compact representation into standalone observations
- during the publication process.</p>
- <p>A further challenge is related to slices of data. Slices of data
- group observations that are of special interest, e.g., slices
- unemployment rates per year of a specific gender are suitable for
- direct visualization in a line diagram. However, depending on the
- number of Dimensions, the number of possible slices can become large
- which makes it difficult to select all interesting slices. Therefore,
- and because of their additional complexity, not many publishers create
- slices. In fact, it is somewhat unclear at this point which slices
- through the data will be useful to (COINS-RDF) users.</p>
- <p>Unanticipated Uses (optional): -</p>
- <p>Existing Work (optional): -</p>
-
- </section> <section id="publishing-one-or-many-ms-excel-spreadsheet-files-with-statistical-data-on-the-web-uc-2">
- <h4><span class="secno">3.1.2 </span>Publishing one or many MS excel spreadsheet files with
- statistical data on the web (UC 2)</h4>
- <p>Not only in government, there is a need to publish considerable
- amounts of statistical data to be consumed in various (also
- unexpected) application scenarios. Typically, Microsoft Excel sheets
- are made available for download. Those excel sheets contain single
- spreadsheets with several multidimensional data tables, having a name
- and notes, as well as column values, row values, and cell values.</p>
- <p>The goal in this use case is to to publish spreadsheet
- information in a machine-readable format on the web, e.g., so that
- crawlers can find spreadsheets that use a certain column value. The
- published data should represent and make available for queries the
- most important information in the spreadsheets, e.g., rows, columns,
- and cell values. QB should provide the level of detail that is needed
- for such a transformation in order to fulfil this use case.</p>
- <p>In a possible use case scenario an institution wants to develop
- or use a software that transforms their excel sheets into the
- appropriate format.</p>
-
- <p class="editorsnote">@@TODO: Concrete example needed.</p>
- <p>Challenges of this use case are:</p>
- <ul>
- <li>Excel sheets provide much flexibility in arranging
- information. It may be necessary to limit this flexibility to allow
- automatic transformation.</li>
- <li>There may be many spreadsheets.</li>
- <li>Semi-structured information, e.g., notes about lineage of
- data cells, may not be possible to be formalized.</li>
- </ul>
- <p>Unanticipated Uses (optional): -</p>
- <p>
- Existing Work (optional): Stats2RDF uses OntoWiki to translate CSV
- into QB <a href="http://aksw.org/Projects/Stats2RDF">[Stats2RDF]</a>.
- </p>
-
- </section> <section id="publishing-sdmx-as-linked-data-uc-3">
- <h4><span class="secno">3.1.3 </span>Publishing SDMX as Linked Data (UC 3)</h4>
- <p>The ISO standard for exchanging and sharing statistical data and
- metadata among organizations is Statistical Data and Metadata eXchange
- (SDMX). Since this standard has proven applicable in many contexts, QB
- is designed to be compatible with the multidimensional model that
- underlies SDMX.</p>
- <p class="editorsnote">@@TODO: The QB spec should maybe also use
- the term "multidimensional model" instead of the less clear "cube
- model" term.</p>
- <p>Therefore, it should be possible to re-publish SDMX data using
- QB.</p>
- <p>
- The scenario for this use case is Eurostat <a href="http://epp.eurostat.ec.europa.eu/">[EUROSTAT]</a>, which
- publishes large amounts of European statistics coming from a data
- warehouse as SDMX and other formats on the web. Eurostat also provides
- an interface to browse and explore the datasets. However, linking such
- multidimensional data to related data sets and concepts would require
- download of interesting datasets and manual integration.
- </p>
- <p>The goal of this use case is to improve integration with other
- datasets; Eurostat data should be published on the web in a
- machine-readable format, possible to be linked with other datasets,
- and possible to be freeley consumed by applications. This use case is
- fulfilled if QB can be used for publishing the data from Eurostat as
- Linked Data for integration.</p>
- <p>A publisher wants to make available Eurostat data as Linked
- Data. The statistical data shall be published as is. It is not
- necessary to represent information for validation. Data is read from
- tsv only. There are two concrete examples of this use case: Eurostat
- Linked Data Wrapper (http://estatwrap.ontologycentral.com/), and
- Linked Statistics Eurostat Data
- (http://eurostat.linked-statistics.org/). They have slightly different
- focus (e.g., with respect to completeness, performance, and agility).
- </p>
- <p>Challenges of this use case are:</p>
- <ul>
- <li>There are large amounts of SDMX data; the Eurostat dataset
- comprises 350 GB of data. This may influence decisions about toolsets
- and architectures to use. One important task is to decide whether to
- structure the data in separate datasets.</li>
- <li>Again, the question comes up whether slices are useful.</li>
- </ul>
- <p>Unanticipated Uses (optional): -</p>
- <p>Existing Work (optional): -</p>
- </section> <section id="publishing-sensor-data-as-statistics-uc-4">
- <h4><span class="secno">3.1.4 </span>Publishing sensor data as statistics (UC 4)</h4>
- <p>Typically, multidimensional data is aggregated. However, there
- are cases where non-aggregated data needs to be published, e.g.,
- observational, sensor network and forecast data sets. Such raw data
- may be available in RDF, already, but using a different vocabulary.</p>
- <p>The goal of this use case is to demonstrate that publishing of
- aggregate values or of raw data should not make much of a difference
- in QB.</p>
- <p>
- For example the Environment Agency uses it to publish (at least
- weekly) information on the quality of bathing waters around England
- and Wales <a href="http://www.epimorphics.com/web/wiki/bathing-water-quality-structure-published-linked-data">[EnvAge]</a>.
- In another scenario DERI tracks from measurements about printing for a
- sustainability report. In the DERI scenario, raw data (number of
- printouts per person) is collected, then aggregated on a unit level,
- and then modelled using QB.
- </p>
- <p>Problems and Limitations:</p>
- <ul>
- <li>This use case also shall demonstrate how to link statistics
- with other statistics or non-statistical data (metadata).</li>
- </ul>
- <p>Unanticipated Uses (optional): -</p>
- <p>
- Existing Work (optional): Semantic Sensor Network ontology <a href="http://purl.oclc.org/NET/ssnx/ssn">[SSN]</a> already provides a
- way to publish sensor information. SSN data provides statistical
- Linked Data and grounds its data to the domain, e.g., sensors that
- collect observations (e.g., sensors measuring average of temperature
- over location and time). A number of organizations, particularly in
- the Climate and Meteorological area already have some commitment to
- the OGC "Observations and Measurements" (O&M) logical data model, also
- published as ISO 19156. The QB spec should maybe also prefer the term
- "multidimensional model" instead of the less clear "cube model" term.
-
-
-
- </p><p class="editorsnote">@@TODO: Are there any statements about
- compatibility and interoperability between O&M and Data Cube that can
- be made to give guidance to such organizations?</p>
- <p></p>
- </section> <section id="registering-statistical-data-in-dataset-catalogs-uc-5">
- <h4><span class="secno">3.1.5 </span>Registering statistical data in dataset catalogs (UC 5)</h4>
- <p>
- After statistics have been published as Linked Data, the question
- remains how to communicate the publication and let users find the
- statistics. There are catalogs to register datasets, e.g., CKAN, <a href="http://www.datacite.org/datacite.org">datacite.org</a>, <a href="http://www.gesis.org/dara/en/home/?lang=en">da|ra</a>, and <a href="http://pangaea.de/">Pangea</a>. Those catalogs require specific
- configurations to register statistical data.
- </p>
- <p>The goal of this use case is to demonstrate how to expose and
- distribute statistics after modeling using QB. For instance, to allow
- automatic registration of statistical data in such catalogs, for
- finding and evaluating datasets. To solve this issue, it should be
- possible to transform QB data into formats that can be used by data
- catalogs.</p>
-
- <p class="editorsnote">@@TODO: Find specific use case scenario or
- ask how other publishers of QB data have dealt with this issue Maybe
- relation to DCAT?</p>
- <p>Problems and Limitations: -</p>
- <p>Unanticipated Uses (optional): If data catalogs contain
- statistics, they do not expose those using Linked Data but for
- instance using CSV or HTML (Pangea [11]). It could also be a use case
- to publish such data using QB.</p>
- <p>Existing Work (optional): -</p>
- </section> <section id="making-transparent-transformations-on-or-different-versions-of-statistical-data-uc-6">
- <h4><span class="secno">3.1.6 </span>Making transparent transformations on or different versions of
- statistical data (UC 6)</h4>
- <p>Statistical data often is used and further transformed for
- analysis and reporting. There is the risk that data has been
- incorrectly transformed so that the result is not interpretable any
- more. Therefore, if statistical data has been derived from other
- statistical data, this should be made transparent.</p>
- <p>The goal of this use case is to describe provenance and
- versioning around statistical data, so that the history of statistics
- published on the web becomes clear. This may also relate to the issue
- of having relationships between datasets published using QB. To fulfil
- this use case QB should recommend specific approaches to transforming
- and deriving of datasets which can be tracked and stored with the
- statistical data.</p>
-
- <p>A simple specific use case is that the Welsh Assembly government
- publishes a variety of population datasets broken down in different
- ways. For many uses then population broken down by some category (e.g.
- ethnicity) is expressed as a percentage. Separate datasets give the
- actual counts per category and aggregate counts. In such cases it is
- common to talk about the denominator (often DENOM) which is the
- aggregate count against which the percentages can be interpreted.</p>
- <p>Challenges of this use case are:</p>
- <ul>
- <li>Operations on statistical data result in new statistical
- data, depending on the operation. For intance, in terms of Data Cube,
- operations such as slice, dice, roll-up, drill-down will result in
- new Data Cubes. This may require representing general relationships
- between cubes (as discussed here: [12]).</li>
- <li>Should Data Cube support explicit declaration of such
- relationships either between separated qb:DataSets or between
- measures with a single qb:DataSet (e.g. ex:populationCount and
- ex:populationPercent)?</li>
- <li>If so should that be scoped to simple, common relationships
- like DENOM or allow expression of arbitrary mathematical relations?</li>
- </ul>
- <p>Unanticipated Uses (optional): -</p>
- <p>Existing Work (optional): Possible relation to Best Practices
- part on Versioning [13], where it is specified how to publish data
- which has multiple versions.</p>
-
-
- </section></section> <section id="consuming-published-statistical-data">
- <h3><span class="secno">3.2 </span>Consuming published statistical data</h3>
-
- <section id="simple-chart-visualizations-of-integrated-published-statistical-datasets-uc-7">
- <h4><span class="secno">3.2.1 </span>Simple chart visualizations of (integrated) published
- statistical datasets (UC 7)</h4>
- <p>Data that is published on the Web is typically visualized by
- transforming it manually into CSV or Excel and then creating a
- visualization on top of these formats using Excel, Tableau,
- RapidMiner, Rattle, Weka etc.</p>
- <p>This use case shall demonstrate how statistical data published
- on the web can be directly visualized, without using commercial or
- highly-complex tools. This use case is fulfilled if data that is
- published in QB can be directly visualized inside a webpage.</p>
- <p>An example scenario is environmental research done within the
- SMART research project (http://www.iwrm-smart.org/). Here, statistics
- about environmental aspects (e.g., measurements about the climate in
- the Lower Jordan Valley) shall be visualized for scientists and
- decision makers. Statistics should also be possible to be integrated
- and displayed together. The data is available as XML files on the web.
- On a separate website, specific parts of the data shall be queried and
- visualized in simple charts, e.g., line diagrams. The following figure
- shows the wanted display of an environmental measure over time for
- three regions in the lower Jordan valley; displayed inside a web page:</p>
-
- <p>
- </p><div class="fig">
- <a href="figures/Level_above_msl_3_locations.png"><img width="800px" src="figures/Level_above_msl_3_locations.png" alt="Line chart visualization of QB data"> </a>
- <div>Line chart visualization of QB data</div>
- </div>
-
- <p></p>
-
- <p>The following figure shows the same measures in a pivot table.
- Here, the aggregate COUNT of measures per cell is given.</p>
-
- <p>
- </p><div class="fig">
- <a href="figures/pivot_analysis_measurements.PNG"><img src="figures/pivot_analysis_measurements.PNG" alt="Pivot analysis measurements"> </a>
- <div>Pivot analysis measurements</div>
- </div>
-
- <p></p>
-
- <p>The use case uses Google App Engine, Qcrumb.com, and Spark. An
- example of a line diagram is given at [14] (some loading time needed).
- Current work tries to integrate current datasets with additional data
- sources, and then having queries that take data from both datasets and
- display them together.</p>
- <p>Challenges of this use case are:</p>
- <ul>
- <li>The difficulties lay in structuring the data appropriately so
- that the specific information can be queried.</li>
- <li>Also, data shall be published with having potential
- integration in mind. Therefore, e.g., units of measurements need to
- be represented.</li>
- <li>Integration becomes much more difficult if publishers use
- different measures, dimensions.</li>
-
- </ul>
- <p>Unanticipated Uses (optional): -</p>
- <p>Existing Work (optional): -</p>
- </section> <section id="uploading-published-statistical-data-in-google-public-data-explorer-uc-8">
- <h4><span class="secno">3.2.2 </span>Uploading published statistical data in Google Public Data
- Explorer (UC 8)</h4>
- <p>Google Public Data Explorer (GPDE -
- http://code.google.com/apis/publicdata/) provides an easy possibility
- to visualize and explore statistical data. Data needs to be in the
- Dataset Publishing Language (DSPL -
- https://developers.google.com/public-data/overview) to be uploaded to
- the data explorer. A DSPL dataset is a bundle that contains an XML
- file, the schema, and a set of CSV files, the actual data. Google
- provides a tutorial to create a DSPL dataset from your data, e.g., in
- CSV. This requires a good understanding of XML, as well as a good
- understanding of the data that shall be visualized and explored.</p>
- <p>In this use case, it shall be demonstrate how to take any
- published QB dataset and to transform it automatically into DSPL for
- visualization and exploration. A dataset that is published conforming
- to QB will provide the level of detail that is needed for such a
- transformation.</p>
- <p>In an example scenario, a publisher P has published data using
- QB. There are two different ways to fulfil this use case: 1) A
- customer C is downloading this data into a triple store; SPARQL
- queries on this data can be used to transform the data into DSPL and
- uploaded and visualized using GPDE. 2) or, one or more XLST
- transformation on the RDF/XML transforms the data into DSPL.</p>
- <p>Challenges of this use case are:</p>
- <ul>
- <li>The technical challenges for the consumer here lay in knowing
- where to download what data and how to get it transformed into DSPL
- without knowing the data.</li>
- <p>Unanticipated Uses (optional): DSPL is representative for using
- statistical data published on the web in available tools for
- analysis. Similar tools that may be automatically covered are: Weka
- (arff data format), Tableau, etc.</p>
- <p>Existing Work (optional): -</p>
- </ul>
- <p>Unanticipated Uses (optional): -</p>
- <p>Existing Work (optional): -</p>
- </section> <section id="allow-online-analytical-processing-on-published-datasets-of-statistical-data-uc-9">
- <h4><span class="secno">3.2.3 </span>Allow Online Analytical Processing on published datasets of
- statistical data (UC 9)</h4>
- <p>Online Analytical Processing [15] is an analysis method on
- multidimensional data. It is an explorative analysis methode that
- allows users to interactively view the data on different angles
- (rotate, select) or granularities (drill-down, roll-up), and filter it
- for specific information (slice, dice).</p>
- <p>The multidimensional model used in QB to model statistics should
- be usable by OLAP systems. More specifically, data that conforms to QB
- can be used to define a Data Cube within an OLAP engine and can then
- be queries by OLAP clients.</p>
- <p>An example scenario of this use case is the Financial
- Information Observation System (FIOS) [16], where XBRL data has been
- re-published using QB and made analysable for stakeholders in a
- web-based OLAP client. The following figure shows an example of using
- FIOS. Here, for three different companies, cost of goods sold as
- disclosed in XBRL documents are analysed. As cell values either the
- number of disclosures or - if only one available - the actual number
- in USD is given:</p>
-
- <p>
- </p><div class="fig">
- <a href="figures/FIOS_example.PNG"><img src="figures/FIOS_example.PNG" alt="OLAP of QB data"> </a>
- <div>OLAP of QB data</div>
- </div>
-
- <p></p>
- <p>Challenges of this use case are:</p>
- <ul>
- <li>A problem lies in the strict separation between queries for
- the structure of data, and queries for actual aggregated values.</li>
- <li>Another problem lies in defining Data Cubes without greater
- insight in the data beforehand.</li>
- <li>Depending on the expressivity of the OLAP queries (e.g.,
- aggregation functions, hierarchies, ordering), performance plays an
- important role.</li>
- <li>QB allows flexibility in describing statistics, e.g., in
- order to reduce redundancy of information in single observations.
- These alternatives make general consumption of QB data more complex.
- Also, it is not clear, what "conforms" to QB means, e.g., is a
- qb:DataStructureDefinition required?</li>
- <p>Unanticipated Uses (optional): -</p>
- <p>Existing Work (optional): -</p>
- </ul>
- <p>Unanticipated Uses (optional): -</p>
- <p>Existing Work (optional): -</p>
- </section> <section id="transforming-published-statistics-into-xbrl-uc-10">
- <h4><span class="secno">3.2.4 </span>Transforming published statistics into XBRL (UC 10)</h4>
- <p>XBRL is a standard data format for disclosing financial
- information. Typically, financial data is not managed within the
- organization using XBRL but instead, internal formats such as excel or
- relational databases are used. If different data sources are to be
- summarized in XBRL data formats to be published, an internally-used
- standard format such as QB could help integrate and transform the data
- into the appropriate format.</p>
- <p>In this use case data that is available as data conforming to QB
- should also be possible to be automatically transformed into such XBRL
- data format. This use case is fulfilled if QB contains necessary
- information to derive XBRL data.</p>
- <p>In an example scenario, DERI has had a use case to publish
- sustainable IT information as XBRL to the Global Reporting Initiative
- (GRI - https://www.globalreporting.org/). Here, raw data (number of
- printouts per person) is collected, then aggregated on a unit level
- and modelled using QB. QB data shall then be used directly to fill-in
- XBRL documents that can be published to the GRI.</p>
- <p>Challenges of this use case are:</p>
- <ul>
- <li>So far, QB data has been transformed into semantic XBRL, a
- vocabulary closer to XBRL. There is the chance that certain
- information required in a GRI XBRL document cannot be encoded using a
- vocabulary as general as QB. In this case, QB could be used in
- concordance with semantic XBRL.</li>
- </ul>
- <p class="editorsnote">@@TODO: Add link to semantic XBRL.</p>
- <p>Unanticipated Uses (optional): -</p>
- <p>Existing Work (optional): -</p>
-
- </section> </section></section>
- <section id="requirements">
- <!--OddPage--><h2><span class="secno">4. </span>Requirements</h2>
-
- <p>The use cases presented in the previous section give rise to the
- following requirements for a standard representation of statistics.
- Requirements are cross-linked with the use cases that motivate them.
- Requirements are similarly categorized as deriving from publishing or
- consuming use cases.</p>
-
- <section id="publishing-requirements">
- <h3><span class="secno">4.1 </span>Publishing requirements</h3>
-
- <section id="machine-readable-and-application-independent-representation-of-statistics">
- <h4><span class="secno">4.1.1 </span>Machine-readable and application-independent representation of
- statistics</h4>
- <p>It should be possible to add abstraction, multiple levels of
- description, summaries of statistics.</p>
-
- <p>Required by: UC1, UC2, UC3, UC4</p>
- </section> <section id="representing-statistics-from-various-resource">
- <h4><span class="secno">4.1.2 </span>Representing statistics from various resource</h4>
- <p>Statistics from various resource data should be possible to be
- translated into QB. QB should be very general and should be usable for
- other data sets such as survey data, spreadsheets and OLAP data cubes.
- What kind of statistics are described: simple CSV tables (UC 1), excel
- (UC 2) and more complex SDMX (UC 3) data about government statistics
- or other public-domain relevant data.</p>
-
- <p>Required by: UC1, UC2, UC3</p>
- </section> <section id="communicating-exposing-statistics-on-the-web">
- <h4><span class="secno">4.1.3 </span>Communicating, exposing statistics on the web</h4>
- <p>It should become clear how to make statistical data available on
- the web, including how to expose it, and how to distribute it.</p>
-
- <p>Required by: UC5</p>
- </section> <section id="coverage-of-typical-statistics-metadata">
- <h4><span class="secno">4.1.4 </span>Coverage of typical statistics metadata</h4>
- <p>It should be possible to add metainformation to statistics as
- found in typical statistics or statistics catalogs.</p>
-
- <p>Required by: UC1, UC2, UC3, UC4, UC5</p>
- </section> <section id="expressing-hierarchies">
- <h4><span class="secno">4.1.5 </span>Expressing hierarchies</h4>
- <p>It should be possible to express hierarchies on Dimensions of
- statistics. Some of this requirement is met by the work on ISO
- Extension to SKOS [17].</p>
-
- <p>Required by: UC3, UC9</p>
- </section> <section id="machine-readable-and-application-independent-representation-of-statistics-1">
- <h4><span class="secno">4.1.6 </span>Machine-readable and application-independent representation of
- statistics</h4>
- <p>It should be possible to add abstraction, multiple levels of
- description, summaries of statistics.</p>
-
- <p>Required by: UC1, UC2, UC3, UC4</p>
- </section> <section id="expressing-aggregation-relationships-in-data-cube">
- <h4><span class="secno">4.1.7 </span>Expressing aggregation relationships in Data Cube</h4>
- <p>Based on [18]: It often comes up in statistical data that you
- have some kind of 'overall' figure, which is then broken down into
- parts. To Supposing I have a set of population observations, expressed
- with the Data Cube vocabulary - something like (in pseudo-turtle):</p>
- <pre>ex:obs1
- sdmx:refArea <uk>;
- sdmx:refPeriod "2011";
- ex:population "60" .
-
-ex:obs2
- sdmx:refArea <england>;
- sdmx:refPeriod "2011";
- ex:population "50" .
-
-ex:obs3
- sdmx:refArea <scotland>;
- sdmx:refPeriod "2011";
- ex:population "5" .
-
-ex:obs4
- sdmx:refArea <wales>;
- sdmx:refPeriod "2011";
- ex:population "3" .
-
-ex:obs5
- sdmx:refArea <northernireland>;
- sdmx:refPeriod "2011";
- ex:population "2" .
-
-
-
-
- </northernireland></wales></scotland></england></uk></pre>
- <p>What is the best way (in the context of the RDF/Data Cube/SDMX
- approach) to express that the values for the England/Scotland/Wales/
- Northern Ireland ought to add up to the value for the UK and
- constitute a more detailed breakdown of the overall UK figure? I might
- also have population figures for France, Germany, EU27, etc...so it's
- not as simple as just taking a qb:Slice where you fix the time period
- and the measure.</p>
- <p>Some of this requirement is met by the work on ISO Extension to
- SKOS [19].</p>
-
-
- <p>Required by: UC1, UC2, UC3, UC9</p>
- </section> <section id="scale---how-to-publish-large-amounts-of-statistical-data">
- <h4><span class="secno">4.1.8 </span>Scale - how to publish large amounts of statistical data</h4>
- <p>Publishers that are restricted by the size of the statistics
- they publish, shall have possibilities to reduce the size or remove
- redundant information. Scalability issues can both arise with
- peoples's effort and performance of applications.</p>
-
- <p>Required by: UC1, UC2, UC3, UC4</p>
- </section> <section id="compliance-levels-or-criteria-for-well-formedness">
- <h4><span class="secno">4.1.9 </span>Compliance-levels or criteria for well-formedness</h4>
- <p>The formal RDF Data Cube vocabulary expresses few formal
- semantic constraints. Furthermore, in RDF then omission of
- otherwise-expected properties on resources does not lead to any formal
- inconsistencies. However, to build reliable software to process Data
- Cubes then data consumers need to know what assumptions they can make
- about a dataset purporting to be a Data Cube.</p>
- <p>What *well-formedness* criteria should Data Cube publishers
- conform to? Specific areas which may need explicit clarification in
- the well-formedness criteria include (but may not be limited to):</p>
- <ul>
- <li>use of abbreviated data layout based on attachment levels</li>
- <li>use of qb:Slice when (completeness, requirements for an
- explicit qb:SliceKey?)</li>
- <li>avoiding mixing two approaches to handling multiple-measures
- </li>
- <li>optional triples (e.g. type triples)</li>
- </ul>
-
- <p>Required by all use cases.</p>
- </section> <section id="declaring-relations-between-cubes">
- <h4><span class="secno">4.1.10 </span>Declaring relations between Cubes</h4>
- <p>In some situations statistical data sets are used to derive
- further datasets. Should Data Cube be able to explicitly convey these
- relationships?</p>
- <p>Note that there has been some work towards this within the SDMX
- community as indicated here:
- http://groups.google.com/group/publishing-statistical-data/msg/b3fd023d8c33561d</p>
-
- <p>Required by: UC6</p>
- </section> </section> <section id="consumption-requirements">
- <h3><span class="secno">4.2 </span>Consumption requirements</h3>
-
- <section id="finding-statistical-data">
- <h4><span class="secno">4.2.1 </span>Finding statistical data</h4>
- <p>Finding statistical data should be possible, perhaps through an
- authoritative service</p>
-
- <p>Required by: UC5</p>
- </section> <section id="retrival-of-fine-grained-statistics">
- <h4><span class="secno">4.2.2 </span>Retrival of fine grained statistics</h4>
- <p>Query formulation and execution mechanisms. It should be
- possible to use SPARQL to query for fine grained statistics.</p>
-
- <p>Required by: UC1, UC2, UC3, UC4, UC5, UC6, UC7</p>
- </section> <section id="understanding---end-user-consumption-of-statistical-data">
- <h4><span class="secno">4.2.3 </span>Understanding - End user consumption of statistical data</h4>
- <p>Must allow presentation, visualization .</p>
-
- <p>Required by: UC7, UC8, UC9, UC10</p>
- </section> <section id="comparing-and-trusting-statistics">
- <h4><span class="secno">4.2.4 </span>Comparing and trusting statistics</h4>
- <p>Must allow finding what's in common in the statistics of two or
- more datasets. This requirement also deals with information quality -
- assessing statistical datasets - and trust - making trust judgements
- on statistical data.</p>
-
- <p>Required by: UC5, UC6, UC9</p>
- </section> <section id="integration-of-statistics">
- <h4><span class="secno">4.2.5 </span>Integration of statistics</h4>
- <p>Interoperability - combining statistics produced by multiple
- different systems. It should be possible to combine two statistics
- that contain related data, and possibly were published independently.
- It should be possible to implement value conversions.</p>
-
- <p>Required by: UC1, UC3, UC4, UC7, UC9, UC10</p>
- </section> <section id="scale---how-to-consume-large-amounts-of-statistical-data">
- <h4><span class="secno">4.2.6 </span>Scale - how to consume large amounts of statistical data</h4>
- <p>Consumers that want to access large amounts of statistical data
- need guidance.</p>
-
- <p>Required by: UC7, UC9</p>
- </section> <section id="common-internal-representation-of-statistics-to-be-exported-in-other-formats">
- <h4><span class="secno">4.2.7 </span>Common internal representation of statistics, to be exported
- in other formats</h4>
- <p>QB data should be possible to be transformed into data formats
- such as XBRL which are required by certain institutions.</p>
-
- <p>Required by: UC10</p>
- </section> <section id="dealing-with-imperfect-statistics">
- <h4><span class="secno">4.2.8 </span>Dealing with imperfect statistics</h4>
- <p>Imperfections - reasoning about statistical data that is not
- complete or correct.</p>
-
- <p>Required by: UC7, UC8, UC9, UC10</p>
- </section> </section> </section>
- <section class="appendix" id="acknowledgments">
- <!--OddPage--><h2><span class="secno">A. </span>Acknowledgments</h2>
- <p>The editors are very thankful for comments and suggestions ...</p>
- </section>
-
- <h2 id="references">References</h2>
-
- <dl>
- <dt id="ref-SDMX">[SMDX]</dt>
- <dd>
- SMDX - User Guide 2009, <a href="http://sdmx.org/wp-content/uploads/2009/02/sdmx-userguide-version2009-1-71.pdf">http://sdmx.org/wp-content/uploads/2009/02/sdmx-userguide-version2009-1-71.pdf</a>
- </dd>
-
- <dt id="ref-SDMX">[Fowler1997]</dt>
- <dd>Fowler, Martin (1997). Analysis Patterns: Reusable Object
- Models. Addison-Wesley. ISBN 0201895420.</dd>
-
- <dt id="ref-QB">[QB]</dt>
- <dd>
- RDF Data Cube vocabulary, <a href="http://dvcs.w3.org/hg/gld/raw-file/default/data-cube/index.html">http://dvcs.w3.org/hg/gld/raw-file/default/data-cube/index.html</a>
- </dd>
-
- <dt id="ref-OLAP">[OLAP]</dt>
- <dd>
- Online Analytical Processing Data Cubes, <a href="http://en.wikipedia.org/wiki/OLAP_cube">http://en.wikipedia.org/wiki/OLAP_cube</a>
- </dd>
-
- <dt id="ref-linked-data">[LOD]</dt>
- <dd>
- Linked Data, <a href="http://linkeddata.org/">http://linkeddata.org/</a>
- </dd>
-
- <dt id="ref-rdf">[RDF]</dt>
- <dd>
- Resource Description Framework, <a href="http://www.w3.org/RDF/">http://www.w3.org/RDF/</a>
- </dd>
-
- <dt id="ref-scovo">[SCOVO]</dt>
- <dd>
- The Statistical Core Vocabulary, <a href="http://sw.joanneum.at/scovo/schema.html">http://sw.joanneum.at/scovo/schema.html</a>
- <br> SCOVO: Using Statistics on the Web of data, <a href="http://sw-app.org/pub/eswc09-inuse-scovo.pdf">http://sw-app.org/pub/eswc09-inuse-scovo.pdf</a>
- </dd>
-
- <dt id="ref-skos">[SKOS]</dt>
- <dd>
- Simple Knowledge Organization System, <a href="http://www.w3.org/2004/02/skos/">http://www.w3.org/2004/02/skos/</a>
- </dd>
-
- <dt id="ref-cog">[COG]</dt>
- <dd>
- SDMX Content Oriented Guidelines, <a href="http://sdmx.org/?page_id=11">http://sdmx.org/?page_id=11</a>
- </dd>
-
- </dl>
-
-
-</body></html>
\ No newline at end of file