~ played with henry's https://bitbucket.org/bblfish/nomo/src/8f6281106d5a/src/main/scala/rdf/NTriplesParser.scala default tip
authorAlexandre Bertails <bertails@gmail.com>
Sun, 05 Feb 2012 23:54:42 -0500
changeset 16 fd05df205029
parent 15 183f0a96bd7e
~ played with henry's https://bitbucket.org/bblfish/nomo/src/8f6281106d5a/src/main/scala/rdf/NTriplesParser.scala
aRDF/build-nomo
aRDF/jena/src/main/scala/JenaModel.scala
aRDF/jena/src/test/scala/JenaTest.scala
aRDF/n-triples-parser/lib/nomo.jar
aRDF/n-triples-parser/src/main/scala/NTriplesParser.scala
aRDF/project/build.scala
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/aRDF/build-nomo	Sun Feb 05 23:54:42 2012 -0500
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+hg clone https://bitbucket.org/pchiusano/nomo
+cd nomo
+./sbt package
+cp target/scala-2.9.1.final/nomo.jar ../n-triples-parser/lib
+
--- a/aRDF/jena/src/main/scala/JenaModel.scala	Sun Feb 05 21:37:53 2012 -0500
+++ b/aRDF/jena/src/main/scala/JenaModel.scala	Sun Feb 05 23:54:42 2012 -0500
@@ -76,7 +76,7 @@
   case class IRI(iri: String) { override def toString = '"' + iri + '"' }
   object IRI extends AlgebraicDataType1[String, IRI]
 
-  type BNode = Node_Blank
+  type BNode = JenaNode
   object BNode extends AlgebraicDataType1[String, BNode] {
     def apply(label: String): BNode = {
       val id = AnonId.create(label)
@@ -88,7 +88,7 @@
 
   lazy val mapper = TypeMapper.getInstance
   
-  type Literal = Node_Literal
+  type Literal = JenaNode
   object Literal extends AlgebraicDataType3[String, Option[LangTag], Option[IRI], Literal] {
     def apply(lit: String, langtagOption: Option[LangTag], datatypeOption: Option[IRI]): Literal = {
       JenaNode.createLiteral(
--- a/aRDF/jena/src/test/scala/JenaTest.scala	Sun Feb 05 21:37:53 2012 -0500
+++ b/aRDF/jena/src/test/scala/JenaTest.scala	Sun Feb 05 23:54:42 2012 -0500
@@ -8,7 +8,7 @@
 import com.hp.hpl.jena.graph._
 import org.w3.rdf.jena._
 
-class JenaTest {
+class TransformerTest {
   
   @Test()
   def mytest(): Unit = {
@@ -31,4 +31,21 @@
     
   }
   
+}
+
+class NTriplesParserTest {
+  
+  @Test()
+  def mytest(): Unit = {
+    val n3 =
+"""<http://www.w3.org/2001/sw/RDFCore/ntriples/> <http://purl.org/dc/elements/1.1/creator> "Dave Beckett" .
+<http://www.w3.org/2001/sw/RDFCore/ntriples/> <http://purl.org/dc/elements/1.1/creator> "Art Barstow" .
+<http://www.w3.org/2001/sw/RDFCore/ntriples/> <http://purl.org/dc/elements/1.1/publisher> <http://www.w3.org/> ."""
+    
+    val parser = new NTriplesParser[JenaModel.type](JenaModel)
+    
+    implicit val U: Unit = ()
+    println(parser.ntriples(n3).get)
+    
+  }
 }
\ No newline at end of file
Binary file aRDF/n-triples-parser/lib/nomo.jar has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/aRDF/n-triples-parser/src/main/scala/NTriplesParser.scala	Sun Feb 05 23:54:42 2012 -0500
@@ -0,0 +1,133 @@
+package org.w3.rdf
+
+import nomo._
+import nomo.Errors.{TreeError, Single}
+
+/**
+ * Parser for the simplest of all RDF encodings: NTriples
+ * http://www.w3.org/TR/rdf-testcases/#ntriples
+ *
+ * @author bblfish
+ * @since 02/02/2012
+ */
+
+class NTriplesParser[M <: Model](val m: M) {
+  
+  import m._
+
+  //setup, should be in type
+  val P = Parsers(Monotypic.String, Errors.tree[Char], Accumulators.position[Unit](4))
+  implicit def toTreeError(msg: String): Errors.TreeError = Errors.Single(msg, None)
+  implicit val U: Unit = ()
+  //end setup
+
+  val alpha_digit_dash = "abcdefghijklmnopqrstuvwxyz0123456789-"
+
+  val lang = P.takeWhile1(c => alpha_digit_dash.contains(c.toLower),pos => Single("digit",Some(pos))).map(l => LangTag(l.get))
+
+  val space = P.takeWhile( c => c == ' '|| c == '\t' )
+  val anySpace =  P.takeWhile(_.isWhitespace )
+
+  def isUriChar(c: Char) = ( ! c.isWhitespace) && c != '<' && c != '>'
+
+
+  val uriRef = ( P.single('<') >> P.takeWhile(isUriChar(_) ) << P.single('>')).map(i=>IRI(i.get))
+  import P.++
+  
+  val bnode = P.word("_:")>>P.takeWhile(_.isLetterOrDigit).map (n=>BNode(n.get))
+
+
+  val lit_u = (P.word("\\u")>> P.any++P.any++P.any++P.any) map {
+    case c1++c2++c3++c4 => Integer.parseInt(new String(Array(c1,c2,c3,c4)),16).toChar
+  }
+  val lit_U = (P.word("\\U")>> P.any++P.any++P.any++P.any++P.any++P.any++P.any++P.any) map {
+    case c1++c2++c3++c4++c5++c6++c7++c8 => Integer.parseInt(new String(Array(c1,c2,c3,c4,c5,c6,c7,c8)),16).toChar
+  }
+  val lt_tab = P.word("\\t").map(c=>0x9.toChar)
+  val lt_cr = P.word("\\r").map(c=>0xD.toChar)
+  val lt_nl = P.word("\\n").map(c=>0xA.toChar)
+  val lt_slash = P.word("\\\\").map(c=>"\\")
+  val lt_quote = P.word("\\\"").map(c=>'"'.toChar)
+
+  val literal = ( lit_u | lit_U | lt_tab | lt_cr | lt_nl | lt_slash | lt_quote |
+      P.takeWhile1(c=> c!= '\\' && c != '"', pos => Single("no char!",Some(pos)))
+    ).many
+
+  val xsd = "http://www.w3.org/2001/XMLSchema#"
+  val rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+  val xsdString = IRI(xsd + "string")
+
+  val plainLit = (P.single('"')>>literal<< P.word("\"")).map(l=> Literal(l.mkString, None, Some(xsdString)))
+
+  val fullLiteral = plainLit ++ (typeFunc | langFunc).optional map {
+    case lit ++ None => lit
+    case Literal(lexicalForm, langtag, _) ++ Some(Left(tpe)) => Literal(lexicalForm, None, Some(tpe))
+    case Literal(lexicalForm, _, datatype) ++ Some(Right(langTag)) => Literal(lexicalForm, Some(langTag), datatype)
+  }
+
+  val typeFunc = (P.word("^^") >> uriRef).map(tpe => Left(tpe))
+  val langFunc = (P.word("@") >> lang ).map(lng=> Right(lng))
+
+  val node = uriRef | bnode | fullLiteral map {
+    case [email protected](_) => NodeIRI(n)
+    case [email protected](_) => NodeBNode(bn)
+    case [email protected](_, _, _) => NodeLiteral(lit)
+  }
+  val pred = uriRef
+  val dot = P.single('.')
+
+  val sentence = (node++(space>>pred)++(space>>node)).map(s=>Triple(s._1._1,s._1._2,s._2)) << (space++dot)
+  val ntriples = anySpace >> (sentence delimit anySpace )
+  
+
+}
+
+object NTripleParser {
+
+  val hexChar = Array( '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F');
+
+
+  def hex(c: Char) = {
+    val b = new StringBuilder(6)
+    b.append("\\u").
+      append(hexChar((c >> 12) & 0xF)).
+      append(hexChar((c >> 8) & 0xF)).
+      append(hexChar((c >> 4) & 0xF)).
+      append(hexChar(c & 0xF))
+    b
+  }
+  def hexLong(c: Char) = {
+    val b = new StringBuilder(10)
+    b.append("\\U").
+      append(hexChar((c >> 28) & 0xF)).
+      append(hexChar((c >> 24) & 0xF)).
+      append(hexChar((c >> 20) & 0xF)).
+      append(hexChar((c >> 16) & 0xF)).
+      append(hexChar((c >> 12) & 0xF)).
+      append(hexChar((c >> 8) & 0xF)).
+      append(hexChar((c >> 4) & 0xF)).
+      append(hexChar(c & 0xF))
+    b
+  }
+
+  def toLiteral(str: String) = {
+    val b = new StringBuilder
+    for (c <- str) yield {
+      if (c <= 0x8) b.append(hex(c))
+      else if (c == 0x9) b.append("\\t")
+      else if (c == 0xA) b.append("\\n")
+      else if (c == 0xB || c == 0xC) b.append(hex(c))
+      else if (c == 0xD) b.append("\\r")
+      else if (c >= 0xE && c <= 0x1F) b.append(hex(c))
+      else if (c == 0x20 || c == 0x21) b.append(c)
+      else if (c == 0x22) b.append('\\').append('"')
+      else if (c >= 0x23 && c <= 0x5b) b.append(c)
+      else if (c == 0x5c) b.append('\\').append('\\')
+      else if (c >= 0x5d && c <= 0x7e) b.append(c)
+      else if (c >= 0x7f && c <= 0xffff) b.append(hex(c))
+      else if (c >= 0x10000 & c <= 0x10FFFF) b.append(hexLong(c))
+    }
+    b.toString()
+  }
+  
+}
--- a/aRDF/project/build.scala	Sun Feb 05 21:37:53 2012 -0500
+++ b/aRDF/project/build.scala	Sun Feb 05 23:54:42 2012 -0500
@@ -66,7 +66,13 @@
     id = "jena",
     base = file("jena"),
     settings = buildSettings ++ jenaDeps ++ testDeps
-  ) dependsOn (rdfModel, graphIsomorphism, transformer)
+  ) dependsOn (rdfModel, graphIsomorphism, transformer, nTriplesParser)
 
+  lazy val nTriplesParser = Project(
+    id = "n-triples-parser",
+    base = file("n-triples-parser"),
+    settings = buildSettings ++ jenaDeps ++ testDeps
+  ) dependsOn (rdfModel)
+  
 }