changeset 22:06b795e0c753

+ pass to create subject nodes + foreign keys to non-primary keys
author Eric Prud'hommeaux <eric@w3.org>
date Mon, 27 Sep 2010 08:30:16 -0400
parents 402e3f761b4d
children 40b955a81d55
files src/main/scala/Main.scala src/test/scala/Test.scala
diffstat 2 files changed, 130 insertions(+), 22 deletions(-) [+]
line wrap: on
line diff
--- a/src/main/scala/Main.scala	Sun Sep 26 15:54:09 2010 -0400
+++ b/src/main/scala/Main.scala	Mon Sep 27 08:30:16 2010 -0400
@@ -7,13 +7,13 @@
 
   case class Database( m:Map[RelName, Relation] )
   case class Relation ( header:Header, body:Body )
-  case class Header (types:Map[AttrName, SQLDatatype], pk:PrimaryKey, fks:ForeignKeys) {
+  case class Header (types:Map[AttrName, SQLDatatype], keys:List[CandidateKey], pk:CandidateKey, fks:ForeignKeys) {
     def keySet () = types.keySet
   }
 
-  type PrimaryKey = List[AttrName]
+  type CandidateKey = List[AttrName]
   type ForeignKeys = Map[List[AttrName], Target]
-  case class Target(rel:RelName, attrs:List[AttrName])
+  case class Target(rel:RelName, attrs:CandidateKey)
 
   type Body = Set[Tuple]
 
@@ -52,24 +52,29 @@
   case class Triple (s:Subject, p:IRI, o:Object)
 
   sealed abstract class Subject
-  case class SubjectIRI(i:IRI) extends Subject
-  implicit def iri2subjectiri(i:IRI):Subject = SubjectIRI(i)
-  case class SubjectBNode(b:BNode) extends Subject
-  implicit def bnode2subjectbnode(b:BNode):Subject = SubjectBNode(b)
+  case class SubjectNode(n:Node) extends Subject
+  implicit def node2subjectnode(n:Node):Subject = SubjectNode(n)
+  implicit def iri2subjectnode(i:IRI):Subject = SubjectNode(i)
+  implicit def bnode2subjectnode(b:BNode):Subject = SubjectNode(b)
 
   sealed abstract class Predicate
   case class PredicateIRI(i:IRI) extends Predicate
   implicit def iri2predicateiri(i:IRI):Predicate = PredicateIRI(i)
 
   sealed abstract class Object
-  case class ObjectIRI(i:IRI) extends Object
-  implicit def iri2objectiri(i:IRI):Object = ObjectIRI(i)
-  case class ObjectBNode(b:BNode) extends Object
-  implicit def bnode2objectbnode(b:BNode):Object = ObjectBNode(b)
-  case class ObjectLiteral (l:Literal) extends Object
+  case class ObjectNode(n:Node) extends Object
+  implicit def node2objectnode(n:Node):Object = ObjectNode(n)
+  implicit def iri2objectnode(i:IRI):Object = ObjectNode(i)
+  implicit def bnode2objectnode(b:BNode):Object = ObjectNode(b)
+  case class ObjectLiteral (n:Literal) extends Object
 
+
+  sealed abstract class Node
+  case class NodeIRI(i:IRI) extends Node
+  implicit def iri2nodeiri(i:IRI):Node = NodeIRI(i)
+  case class NodeBNode(b:BNode) extends Node
+  implicit def bnode2nodebnode(b:BNode):Node = NodeBNode(b)
   case class IRI(iri:String)
-
   case class BNode(label:String)
 
   sealed abstract class Literal
@@ -89,6 +94,9 @@
   import RDF._
   import SQL._
 
+  type KeyMap = Map[CandidateKey, Map[List[CellValue], Node]]
+  type NodeMap = Map[RelName, KeyMap]
+
   // Transformation argument:
   case class StemIRI(stem:String) {
     def +(path:String):IRI = IRI(stem + path)
@@ -96,16 +104,107 @@
 
   // Mapping functions:
   def databasemap (u:StemIRI, db:Database) : RDFGraph = {
-    db.m.flatMap{case(rn, r) => relationmap(u, rn, r)}.toSet
+    val idxables:Set[RelName] = db.m.keySet.filter(rn => db.m(rn).header.keys.size > 0)
+    val nodes:NodeMap = idxables.map(rn => rn -> relation2subject(u, rn, db.m(rn))).toMap
+    db.m.keySet.flatMap(rn => relationmap(u, rn, db.m(rn), nodes)).toSet
   }
 
-  def relationmap (u:StemIRI, rn:RelName, r:Relation) : RDFGraph =
-    body(r).flatMap(t => tuplemap(u, rn, t, r))
+  def relation2subject (u:StemIRI, rn:RelName, r:Relation) : KeyMap = {
+    val l = List((List("KeyA", "KeyB"), List(10, 11), 1), (List("KeyA", "KeyB"), List(20, 21), 2))
+    val g = Map("KeyA" -> Map("ValA1" -> 1, "ValA2" -> 2), "KeyB" -> Map("ValB1" -> 1, "ValB2" -> 2))
+    l.foldLeft(Map[String, Map[Int, Int]]())((m, t) => {
+      val pairs = t._1.zip(t._2)
+      pairs.foldLeft(m)((m, p) => {
+	if (m.get(p._1).isDefined) {
+	  val byKey = m(p._1)
+	  if (byKey.get(p._2).isDefined) {
+	    error("tried to set " + rn + p._1 + p._2 + " = " + t._3 + "(was " + byKey(p._2) + ")")
+	  } else {
+	    val im1 = byKey ++ Map[Int, Int](p._2 -> t._3)
+	    m ++ Map[String, Map[Int, Int]](p._1 -> im1)
+	  }
+	} else {
+	  m ++ Map(p._1 -> Map(p._2 -> t._3))
+	}
+      })
+    })
 
-  def tuplemap (u:StemIRI, rn:RelName, t:Tuple, r:Relation) : Set[Triple] = {
+    val ck1:CandidateKey = List("name", "ssn")
+    val ck2:CandidateKey = List("ID")
+    val v11:List[CellValue] = List(LexicalValue("bob"), LexicalValue("123"))
+    val v21:List[CellValue] = List(LexicalValue("alice"), LexicalValue("8"))
+    val v12:List[CellValue] = List(LexicalValue("18"))
+    val v22:List[CellValue] = List(LexicalValue("23"))
+    val s1:Node = BNode("1")
+    val s2:Node = BNode("2")
+    val data:Set[(List[(CandidateKey, List[CellValue])], Node)] =
+      Set((List((ck1, v11),(ck2, v21)), s1),
+	  (List((ck1, v12),(ck2, v22)), s2))
+    val test = data.foldLeft(Map[CandidateKey, Map[List[CellValue], Node]]())((m, t) => {
+      val pairs = t._1
+      pairs.foldLeft(m)((m, p) => {
+	if (m.get(p._1).isDefined) {
+	  val byKey = m(p._1)
+	  if (byKey.get(p._2).isDefined) {
+	    error("tried to set " + rn + p._1 + p._2 + " = " + t._2 + "(was " + byKey(p._2) + ")")
+	  } else {
+	    val im1 = byKey ++ Map[List[CellValue], Node](p._2 -> t._2)
+	    m ++ Map[CandidateKey, Map[List[CellValue], Node]](p._1 -> im1)
+	  }
+	} else {
+	  m ++ Map[CandidateKey, Map[List[CellValue], Node]](p._1 -> Map(p._2 -> t._2))
+	}
+      })
+    })
+
+    val goal:Map[CandidateKey, Map[List[CellValue], Node]] =
+      Map(ck1 -> Map(v11 -> s1,
+      		     v12 -> s2),
+      	  ck2 -> Map(v21 -> s1,
+      		     v22 -> s2))
+    assert(goal == test)
+
+    val data2:Set[(List[(CandidateKey, List[CellValue])], Node)] = body(r).map(t => {
+      // (List(List("name", "ssn"), List("ID")), List(List("bob", 123), List(18)), 1)
+      // (List(List("name", "ssn"), List("ID")), List(List("alice", 8), List(23)), 2)
+      tuple2subject(u, rn, t, r)
+    })
+    data2.foldLeft(Map[CandidateKey, Map[List[CellValue], Node]]())((m, t) => {
+      val pairs = t._1
+      pairs.foldLeft(m)((m, p) => {
+	if (m.get(p._1).isDefined) {
+	  val byKey = m(p._1)
+	  if (byKey.get(p._2).isDefined) {
+	    error("tried to set " + rn + p._1 + p._2 + " = " + t._2 + "(was " + byKey(p._2) + ")")
+	  } else {
+	    val im1 = byKey ++ Map[List[CellValue], Node](p._2 -> t._2)
+	    m ++ Map[CandidateKey, Map[List[CellValue], Node]](p._1 -> im1)
+	  }
+	} else {
+	  m ++ Map[CandidateKey, Map[List[CellValue], Node]](p._1 -> Map(p._2 -> t._2))
+	}
+      })
+    })
+
+  }
+
+  def tuple2subject (u:StemIRI, rn:RelName, t:Tuple, r:Relation) : (List[(CandidateKey, List[CellValue])], Node) = {
     val h = header(r)
     val vs = pk(h).map(k => lexvalue(h, t, k).asInstanceOf[LexicalValue])
-    val s = nodemap(u, rn, pk(h), vs) // Assume: no NULLs in primary key
+    val s:Node = if (pk(h).length == 0) freshbnode() else nodemap(u, rn, pk(h), vs) // Assume: no NULLs in primary key
+    (h.keys.map(k => {
+      val values:List[CellValue] = k.map(a => t(a))
+      (k, values)
+    }), s)
+  }
+
+  def relationmap (u:StemIRI, rn:RelName, r:Relation, nodes:NodeMap) : RDFGraph =
+    body(r).flatMap(t => tuplemap(u, rn, t, r, nodes))
+
+  def tuplemap (u:StemIRI, rn:RelName, t:Tuple, r:Relation, nodes:NodeMap) : Set[Triple] = {
+    val h = header(r)
+    val vs = pk(h).map(k => lexvalue(h, t, k).asInstanceOf[LexicalValue])
+    val s:Node = if (nodes.get(rn).isDefined) nodes(rn)(pk(h))(vs) else freshbnode()
 
     val allAttrs:Set[AttrName] = h.keySet
     val allFKs:Set[List[AttrName]] = h.fks.keySet
@@ -121,21 +220,24 @@
     val referencelist = h.fks.keySet -- nullFKs
 
     scalarlist.map(a => scalartriples(u, rn, s, a, h, t)) ++
-    referencelist.map(as => referencetriples(u, rn, s, as, h, t))
+    referencelist.map(as => referencetriples(u, rn, s, as, h, t, nodes))
 
   }
 
-  def scalartriples (u:StemIRI, rn:RelName, s:IRI, a:AttrName, h:Header, t:Tuple) : Triple = {
+  def freshbnode () : Node = BNode("999")
+
+  def scalartriples (u:StemIRI, rn:RelName, s:Node, a:AttrName, h:Header, t:Tuple) : Triple = {
     val p = predicatemap (u, rn, List(a))
     val l = t(a).asInstanceOf[LexicalValue]
     val o = literalmap(l, sqlDatatype(h, a))
     Triple(s, p, o)
   }
-  def referencetriples (u:StemIRI, rn:RelName, s:IRI, as:List[AttrName], h:Header, t:Tuple) : Triple = {
+  def referencetriples (u:StemIRI, rn:RelName, s:Node, as:List[AttrName], h:Header, t:Tuple, nodes:NodeMap) : Triple = {
     val p = predicatemap (u, rn, as)
     val ls:List[LexicalValue] = as.map(a =>t(a).asInstanceOf[LexicalValue])
     val target = h.fks(as)
-    val o = nodemap(u, target.rel, target.attrs, ls)
+    val o999 = nodemap(u, target.rel, target.attrs, ls)
+    val o:Object = nodes(target.rel)(target.attrs)(ls)
     Triple(s, p, o)
   }
 
--- a/src/test/scala/Test.scala	Sun Sep 26 15:54:09 2010 -0400
+++ b/src/test/scala/Test.scala	Mon Sep 27 08:30:16 2010 -0400
@@ -13,6 +13,7 @@
     val addresses = Relation(Header(Map("ID" -> SQLInt(),
 					"city" -> SQLString(),
 					"state" -> SQLString()),
+				    List(List("ID")),
 				    List("ID"),
 				    Map()),
 			     Set(Map("ID" -> LexicalValue("18"),
@@ -22,6 +23,7 @@
     val people = Relation(Header(Map("ID" -> SQLInt(),
 				     "fname" -> SQLString(),
 				     "addr" -> SQLInt()),
+				 List(List("ID")),
 				 List("ID"),
 				 Map(List("addr") -> Target("Addresses", List("ID")))),
 			  Set(Map("ID" -> LexicalValue("7"),
@@ -55,6 +57,7 @@
     val addresses = Relation(Header(Map("ID" -> SQLInt(),
 					"city" -> SQLString(),
 					"state" -> SQLString()),
+				    List(List("ID")),
 				    List("ID"),
 				    Map()),
 			     Set(Map("ID" -> LexicalValue("18"),
@@ -66,6 +69,7 @@
 				     "addr" -> SQLInt(),
 				     "deptName" -> SQLString(),
 				     "deptCity" -> SQLString()),
+				 List(List("ID")),
 				 List("ID"),
 				 Map(List("addr") -> Target("Addresses", List("ID")),
 				     List("deptName", "deptCity") -> Target("Department", List("name", "city")))),
@@ -84,6 +88,7 @@
 					"name" -> SQLString(),
 					"city" -> SQLString(),
 					"manager" -> SQLInt()),
+				    List(List("ID"), List("name", "city")),
 				    List("ID"),
 				    Map(List("manager") -> Target("People", List("ID")))),
 			     Set(Map("ID" -> LexicalValue("23"),
@@ -124,6 +129,7 @@
     val employees = Relation(Header(Map("ID" -> SQLInt(),
 					"fname" -> SQLString(),
 					"boss" -> SQLInt()),
+				    List(List("ID")),
 				    List("ID"),
 				    Map(List("boss") -> Target("Employees", List("ID")))),
 			     Set(Map("ID" -> LexicalValue("1"),
Set up and maintained by W3C Systems Team, please report bugs to sysreq@w3.org.

W3C would like to thank Microsoft who donated the server that allows us to run this service.