Simplify whitespace rules for N-Quads
authorGavin Carothers <gavin@carothers.name>
Tue, 27 Aug 2013 15:24:45 -0700
changeset 1016 28525922b4fb
parent 1015 666998a12314
child 1017 6080497ca9d7
Simplify whitespace rules for N-Quads
nquads/index.html
nquads/nquads-bnf.html
nquads/nquads.bnf
--- a/nquads/index.html	Tue Aug 27 15:21:02 2013 -0700
+++ b/nquads/index.html	Tue Aug 27 15:24:45 2013 -0700
@@ -189,6 +189,8 @@
           <h3>Grammar</h3>
           <p>A N-Quads document is a Unicode[[!UNICODE]] character string encoded in UTF-8.
           Unicode codepoints only in the range U+0 to U+10FFFF inclusive are allowed.</p>
+          <p>White space (tab <code>U+0009</code> or space <code>U+0020</code>) is used to separate two terminals which would otherwise be (mis-)recognized as one terminal. White space is significant in the production <a href="#grammar-production-STRING_LITERAL_QUOTE">STRING_LITERAL_QUOTE</a>.</p>
+          <p>Comments in N-Quads take the form of '<code>#</code>', outside an <code>IRIREF</code> or <code>STRING_LITERAL_QUOTE</code>, and continue to the end of line (<code>EOL</code>) or end of file if there is no end of line after the comment marker. Comments are treated as white space.</p>
           <p>The <abbr title="Extended Backus–Naur Form">EBNF</abbr> used here is defined in XML 1.0
             [[!EBNF-NOTATION]].</p>
           <p>Escape sequence rules are the same as Turtle
--- a/nquads/nquads-bnf.html	Tue Aug 27 15:21:02 2013 -0700
+++ b/nquads/nquads-bnf.html	Tue Aug 27 15:24:45 2013 -0700
@@ -6,11 +6,11 @@
     <td>::=</td>
     <td><a href='#grammar-production-statement'>statement</a>? (<a href='#grammar-production-EOL'>EOL</a> <a href='#grammar-production-statement'>statement</a>)<code class='grammar-star'>*</code> <a href='#grammar-production-EOL'>EOL</a>?</td>
 </tr>
-            <tr id="grammar-production-statement" data-grammar-original="[2]  statement          ::= WS* subject WS+ predicate WS+ object (WS+ graphLabel)? WS* &#x27;.&#x27; WS*" data-grammar-expression="(&#x27;,&#x27;, [(&#x27;*&#x27;, (&#x27;id&#x27;, &#x27;WS&#x27;)), (&#x27;id&#x27;, &#x27;subject&#x27;), (&#x27;+&#x27;, (&#x27;id&#x27;, &#x27;WS&#x27;)), (&#x27;id&#x27;, &#x27;predicate&#x27;), (&#x27;+&#x27;, (&#x27;id&#x27;, &#x27;WS&#x27;)), (&#x27;id&#x27;, &#x27;object&#x27;), (&#x27;?&#x27;, (&#x27;,&#x27;, [(&#x27;+&#x27;, (&#x27;id&#x27;, &#x27;WS&#x27;)), (&#x27;id&#x27;, &#x27;graphLabel&#x27;)])), (&#x27;*&#x27;, (&#x27;id&#x27;, &#x27;WS&#x27;)), (&quot;&#x27;&quot;, &#x27;.&#x27;), (&#x27;*&#x27;, (&#x27;id&#x27;, &#x27;WS&#x27;))])" >
+            <tr id="grammar-production-statement" data-grammar-original="[2]  statement          ::= subject predicate object graphLabel? &#x27;.&#x27;" data-grammar-expression="(&#x27;,&#x27;, [(&#x27;id&#x27;, &#x27;subject&#x27;), (&#x27;id&#x27;, &#x27;predicate&#x27;), (&#x27;id&#x27;, &#x27;object&#x27;), (&#x27;?&#x27;, (&#x27;id&#x27;, &#x27;graphLabel&#x27;)), (&quot;&#x27;&quot;, &#x27;.&#x27;)])" >
     <td>[2]</td>
     <td><code>statement</code></td>
     <td>::=</td>
-    <td><a href='#grammar-production-WS'>WS</a><code class='grammar-star'>*</code> <a href='#grammar-production-subject'>subject</a> <a href='#grammar-production-WS'>WS</a><code class='grammar-plus'>+</code> <a href='#grammar-production-predicate'>predicate</a> <a href='#grammar-production-WS'>WS</a><code class='grammar-plus'>+</code> <a href='#grammar-production-object'>object</a> (<a href='#grammar-production-WS'>WS</a><code class='grammar-plus'>+</code> <a href='#grammar-production-graphLabel'>graphLabel</a>)? <a href='#grammar-production-WS'>WS</a><code class='grammar-star'>*</code> '<code class='grammar-literal'>.</code>' <a href='#grammar-production-WS'>WS</a><code class='grammar-star'>*</code></td>
+    <td><a href='#grammar-production-subject'>subject</a> <a href='#grammar-production-predicate'>predicate</a> <a href='#grammar-production-object'>object</a> <a href='#grammar-production-graphLabel'>graphLabel</a>? '<code class='grammar-literal'>.</code>'</td>
 </tr>
             <tr id="grammar-production-subject" data-grammar-original="[3]  subject            ::= IRIREF | BLANK_NODE_LABEL" data-grammar-expression="(&#x27;|&#x27;, [(&#x27;id&#x27;, &#x27;IRIREF&#x27;), (&#x27;id&#x27;, &#x27;BLANK_NODE_LABEL&#x27;)])" >
     <td>[3]</td>
@@ -42,7 +42,7 @@
     <td>::=</td>
     <td><a href='#grammar-production-STRING_LITERAL_QUOTE'>STRING_LITERAL_QUOTE</a> ('<code class='grammar-literal'>^^</code>' <a href='#grammar-production-IRIREF'>IRIREF</a> <code>| </code> '<code class='grammar-literal'>@</code>' <a href='#grammar-production-LANG'>LANG</a>)?</td>
 </tr>
-<tr><td colspan="5"><h4 id="terminals">Productions for terminals</h4></td></tr>
+<tr><td colspan="4"><h4 id="terminals">Productions for terminals</h4></td></tr>
             <tr id="grammar-production-LANGTAG" data-grammar-original="[144s] LANGTAG          ::= &quot;@&quot; [a-zA-Z]+ ( &quot;-&quot; [a-zA-Z0-9]+ )*" data-grammar-expression="(&#x27;,&#x27;, [(&quot;&#x27;&quot;, &#x27;@&#x27;), (&#x27;+&#x27;, (&#x27;[&#x27;, &#x27;a-zA-Z&#x27;)), (&#x27;*&#x27;, (&#x27;,&#x27;, [(&quot;&#x27;&quot;, &#x27;-&#x27;), (&#x27;+&#x27;, (&#x27;[&#x27;, &#x27;a-zA-Z0-9&#x27;))]))])" class='grammar-token'>
     <td>[144s]</td>
     <td><code>LANGTAG</code></td>
@@ -55,12 +55,6 @@
     <td>::=</td>
     <td>[<code class='grammar-chars'>#xD#xA</code>]<code class='grammar-plus'>+</code></td>
 </tr>
-            <tr id="grammar-production-WS" data-grammar-original="[9] WS                 ::= [#x20#x9]" data-grammar-expression="(&#x27;[&#x27;, &#x27;#x20#x9&#x27;)" class='grammar-token'>
-    <td>[9]</td>
-    <td><code>WS</code></td>
-    <td>::=</td>
-    <td>[<code class='grammar-chars'>#x20#x9</code>]</td>
-</tr>
             <tr id="grammar-production-IRIREF" data-grammar-original="[10] IRIREF ::=  &#x27;&lt;&#x27; ([^#x00-#x20&lt;&gt;&quot;{}|^`\] | UCHAR)* &#x27;&gt;&#x27;" data-grammar-expression="(&#x27;,&#x27;, [(&quot;&#x27;&quot;, &#x27;&lt;&#x27;), (&#x27;*&#x27;, (&#x27;|&#x27;, [(&#x27;[&#x27;, &#x27;^#x00-#x20&lt;&gt;&quot;{}|^`\\&#x27;), (&#x27;id&#x27;, &#x27;UCHAR&#x27;)])), (&quot;&#x27;&quot;, &#x27;&gt;&#x27;)])" class='grammar-token'>
     <td>[10]</td>
     <td><code>IRIREF</code></td>
--- a/nquads/nquads.bnf	Tue Aug 27 15:21:02 2013 -0700
+++ b/nquads/nquads.bnf	Tue Aug 27 15:24:45 2013 -0700
@@ -1,5 +1,5 @@
 [1]  nquadsDoc          ::= statement? (EOL statement)* EOL?
-[2]  statement          ::= WS* subject WS+ predicate WS+ object (WS+ graphLabel)? WS* '.' WS*
+[2]  statement          ::= subject predicate object graphLabel? '.'
 [3]  subject            ::= IRIREF | BLANK_NODE_LABEL
 [4]  predicate          ::= IRIREF 
 [5]  object             ::= IRIREF | BLANK_NODE_LABEL | literal
@@ -11,8 +11,6 @@
 [144s] LANGTAG          ::= "@" [a-zA-Z]+ ( "-" [a-zA-Z0-9]+ )* 
 
 [8]  EOL                ::= [#xD#xA]+
-[9] WS                 ::= [#x20#x9]
-
 [10] IRIREF ::=  '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>'
 [11] STRING_LITERAL_QUOTE ::= '"' ( [^#x22#x5C#xA#xD] | ECHAR | UCHAR )* '"'