~ temp commit after a lot of refactoring + documentation no-hierarchy
authorAlexandre Bertails <bertails@gmail.com>
Sat, 12 Feb 2011 22:48:05 -0500
branchno-hierarchy
changeset 337 36f6fb9e06fa
parent 336 8d846f38b2ca
child 338 bcff22b5561a
~ temp commit after a lot of refactoring + documentation
directmapping/src/main/scala/DirectMapping.scala
rdb/src/main/scala/RDB.scala
sql/src/main/scala/SQL.scala
--- a/directmapping/src/main/scala/DirectMapping.scala	Sat Feb 12 18:33:02 2011 -0500
+++ b/directmapping/src/main/scala/DirectMapping.scala	Sat Feb 12 22:48:05 2011 -0500
@@ -8,9 +8,16 @@
 
   lazy val DirectMapping = new DirectMapping {  }
 
+  /**
+   * The mapping functions implementing
+   * <http://www.w3.org/2001/sw/rdb2rdf/directGraph/>
+   */
   trait DirectMapping {
 
-    // should be done by BNode
+    /**
+     * trick to reset the BNode generation, which is predictable using this function
+     * the RDF module should provide this functionality at some point
+     */
     private var NextBNode = 97
     def freshbnode () : BNode = {
       val ret = NextBNode
@@ -18,16 +25,33 @@
       BNode(ret.toChar.toString)
     }
 
-    // equivalent to RelName -> CandidateKey -> List[CellValue] -> Node
+    /**
+     * given:
+     *   - rn a RelName
+     *   - ck a CandidateKey <- ck must be part of the relation rn with name rn
+     *   - vs a list of values <- vs should correspond to the values for ck
+     * a function of type NodeMap maps these elements to a unique RDF node
+     * it's type is equivalent to the following curried function:
+     *   RelName -> CandidateKey -> List[CellValue] -> Node
+     * it verifies the following axiom:
+     *   ∀ r:Relation, ∀ ck:CandidateKey, ck ∊ Relation, ∀ t1:Tuple, ∀ t2:Tuple,
+     *   t1 ≠ t2 -> l1 = t1(ck) -> l2 = t2(ck) -> l1 = l2 -> nodemap(r)(ck)(l1) ≠ nodemap(r)(ck)(l1)
+     */
     type NodeMap = PartialFunction[RelName, KeyMap]
 
+    /**
+     * dbToNodeMap builds the NodeMap making the tuple Node accessible to their candidate keys
+     * it's defined only for the indexable table, as we need to have at least one candidate key
+     */
     def dbToNodeMap(db:Database):NodeMap =
       db.indexables map { rn => rn -> keyMapForRelation(db(rn)) } toMap
 
     /**
-     * A KeyMap associates the candidate key and key values with the
-     * node for any tuple in a unique relation.
-     * CandidateKey -> List[CellValue] -> Node
+     * given:
+     *   - ck a CandidateKey <- all the cks are expected coming from the same relation
+     *   - vs a list of values <- vs should correspond to the values for ck
+     * a function of type KeyMap maps these elements to a unique RDF node
+     *   CandidateKey -> List[CellValue] -> Node
      */
     type KeyMap = PartialFunction[CandidateKey, PartialFunction[List[CellValue], Node]]
 
@@ -47,21 +71,22 @@
         } }  
       }
       val m = Map[CandidateKey, Map[List[CellValue], Node]]()
-      val tuples = r.body map { t => tupleToNodeIRI(t, r) }
+      val tuples = r.body map { t => tupleToNode(t, r) }
       tuples.foldLeft(m){ case (m, (pairs, node)) => ++(m, pairs, node) }
     }
     
     /**
-     * The mapping functions implementing
-     * <http://www.w3.org/2001/sw/rdb2rdf/directGraph/>
+     * maps a tuple to a unique rdf node, to be accessible though its candidate keys
+     * the node is:
+     * * if there is a primary key: the constant IRI based on the actual values from the pk
+     * * if no primary key: a fresh bnode
      */
-      
-    def tupleToNodeIRI (t:Tuple, r:Relation) : (List[(CandidateKey, List[CellValue])], Node) = {
+    def tupleToNode (t:Tuple, r:Relation) : (List[(CandidateKey, List[CellValue])], Node) = {
       val s:Node =
         r.pk match {
           case Some(pk) =>
             /** Table has a primkary key. */	    
-            NodeIRI(iri(r, pk, t.notNullLexvalues(pk)))
+            NodeIRI(iri(r, pk, t.lexvalues(pk)))
           case None =>
             /** Table has no primkary key (but has some candidate keys). */
             NodeBNode(freshbnode())
@@ -69,44 +94,55 @@
       (r.candidates map { k => (k, k.attrs map { t(_) }) }, s)
     }
   
-    /** The triples-generating functions start with databasemap: */
-    def databaseSemantics (db:Database) : Graph = {
+    /**
+     * Main function expressing the RDF semantics of a SQL database
+     */
+    def databaseSemantics(db:Database):Graph = {
       NextBNode = 97
-      val nodeMap = dbToNodeMap(db)
-      Graph(db.keySet flatMap  { (rn:RelName) => relationSemantics(db(rn), nodeMap, db) })
+      val nodemap = dbToNodeMap(db)
+      Graph(db.relNames flatMap { rn:RelName => relationSemantics(db, nodemap, db(rn)) })
     }
   
-    def relationSemantics (r:Relation, nodes:NodeMap, db:Database) : Graph =
+    def relationSemantics(db:Database, nodemap:NodeMap, r:Relation):Graph =
       /* flatMap.toSet assumes that no two triples from directT would be the same.
        * We know this because relations with candidate keys are mapped to unique
        * subjects, and potentially redundant rows get unique blank node subjects.
        */
-      Graph(r.body flatMap { t => tupleSemantics(t, r, nodes, db) })
+      Graph(r.body flatMap { t => tupleSemantics(db, nodemap, r, t) })
   
-    def tupleSemantics (t:Tuple, r:Relation, nodes:NodeMap, db:Database) : Set[Triple] = {
+    def tupleSemantics (db:Database, nodemap:NodeMap, r:Relation, t:Tuple):Set[Triple] = {
       val s:Node =
+	// look for the first candidate key if available
         r.candidates.headOption match {
-          // Known to have at least one key, so take the first one.
+	  // if there is a candidate key, we know we can retrieve the mapped node
+	  // null values are ok at that point
           case Some(firstKey) => {
-            val vs = t.notNullLexvalues(firstKey)
-            nodes(r.name)(firstKey)(vs)
+            val cellvalues = t.cellvalues(firstKey)
+            nodemap(r.name)(firstKey)(cellvalues)
           }
-          /** Table has no candidate keys. */
+          // there is no candidate key, we have to come up with a new bnode
           case None =>
             NodeBNode(freshbnode())  
         }
-      val refs = references(t, r)  map { referenceSemantics(s, _, r, t, nodes) }
-      val scals = scalars(t, r) flatMap { lexicalValueSemantics(r.name, s, _, r.header, t) }
+      // the foreign keys create triples
+      val triplesFromFKs = t.references(r) map { referenceSemantics(s, _, r, t, nodemap) }
+      // the lexical values (ie. not null values) also create triples
+      val triplesFromLexicalValues = t.scalars(r) flatMap { lexicalValueSemantics(r, s, _, t) }
+      // the relation provenance is mapped to an RDF type information, computed from the relation itself
       val triple = Triple(SubjectNode(s),
 			  PredicateIRI(IRI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")),
 			  ObjectNode(NodeIRI(IRI(UE(r)))))
-      refs ++ scals + triple
+      triplesFromFKs ++ triplesFromLexicalValues + triple
     }
-    
-    def lexicalValueSemantics (rn:RelName, s:Node, a:AttrName, h:Header, t:Tuple) : Option[Triple] = {
-      val p = predicateSemantics (rn, new AttrList { val attrs = List(a) } )
+
+    /**
+     * 
+     */
+    def lexicalValueSemantics(r:Relation, s:Node, a:AttrName, t:Tuple):Option[Triple] = {
+      // a is implicitly promoted to an AttrList
+      val p = predicateSemantics(r, a)
       val cellValue = t(a)
-      val datatype = h.datatype(a)
+      val datatype = r.header(a)
       (cellValue, datatype)  match {
 	case (LexicalValue(l), Datatype.STRING) => {
 	  val o = PlainLiteral(l, None)
@@ -121,21 +157,21 @@
     }
 
     def referenceSemantics (s:Node, fk:ForeignKey, r:Relation, t:Tuple, nodes:NodeMap) : Triple = {
-      val p = predicateSemantics (r.name, fk)
-      val ls:List[LexicalValue] = t.notNullLexvalues(fk)
+      val p = predicateSemantics (r, fk)
+      val cellvalues:List[CellValue] = t.cellvalues(fk)
       val ForeignKey(as, Target(rel, key)) = fk
       if (!(nodes isDefinedAt rel))
         error("No referent relation \"" + rel + "\" to match " + r.name + t)
       if (!(nodes(rel) isDefinedAt key))
         error("Relation " + rel + " has no attributes (" + key + ") to match " + r.name + t)
-      if (!(nodes(rel)(key) isDefinedAt ls))
-        error("Relation " + rel + "(" + key + ") has no values " + ls + " to match " + r.name + t)
-      val o:Object = ObjectNode(nodes(rel)(key)(ls))
+      if (!(nodes(rel)(key) isDefinedAt cellvalues))
+        error("Relation " + rel + "(" + key + ") has no values " + cellvalues + " to match " + r.name + t)
+      val o:Object = ObjectNode(nodes(rel)(key)(cellvalues))
        Triple(SubjectNode(s), PredicateIRI(p), o)
      }
-  
-    def predicateSemantics (rn:RelName, as:AttrList) : IRI =
-      IRI(UE(rn) + "#" + as.attrs.mkString("_"))
+
+    def predicateSemantics (r:Relation, as:AttrList) : IRI =
+      IRI(UE(r) + "#" + as.attrs.mkString("_"))
 
     // TODO: aren't they already part of the RDF model?
     def datatypeSemantics (d:Datatype) : IRI =
--- a/rdb/src/main/scala/RDB.scala	Sat Feb 12 18:33:02 2011 -0500
+++ b/rdb/src/main/scala/RDB.scala	Sat Feb 12 22:48:05 2011 -0500
@@ -5,68 +5,153 @@
  */
 object RDB {
 
-  case class Database (m:Map[RelName, Relation]) {
-    def apply (rn:RelName) = m(rn)
-    def keySet = m.keySet.toSet
+  /**
+   * a Database maps a relation name to the actual Relation
+   */
+  case class Database(private val m:Map[RelName, Relation]) extends PartialFunction[RelName, Relation] {
+    def apply(rn:RelName) = m(rn)
+    def isDefinedAt(rn:RelName) = m isDefinedAt rn
+    /** returns all the relation names */
+    def relNames:Set[RelName] = m.keySet.toSet
+    /** return all the relations with at least on Candidate Key */
     def indexables = m collect { case (rn, r) if r.isIndexable => rn }
   }
+
   object Database {
     def apply (l:Relation*):Database =
       Database(l map { r => (r.name -> r) } toMap)
   }
 
-  case class Relation (name:RelName, header:Header, body:List[Tuple], candidates:List[CandidateKey], pk:Option[CandidateKey], fks:ForeignKeys) {
-    // TODO: should be + instead of ++
-    def ++ (t:Tuple):Relation = this.copy(body = body :+ t)
+  /**
+   * definition of a Relation
+   * pk should be seen as the function extracting the unique Primary Key from candidates (if present)
+   * the current SQL parser ensures the pk is materialized twice
+   */
+  case class Relation(name:RelName,
+		      header:Header,
+		      body:List[Tuple],
+		      candidates:List[CandidateKey],
+		      pk:Option[CandidateKey],
+		      fks:ForeignKeys) {
+    /** adds a tuple in the body of the relation */
+    def +(t:Tuple):Relation = this.copy(body = body :+ t)
+    /** a relation is indexable if it has at least one candidate key */
     def isIndexable:Boolean = candidates.nonEmpty
   }
 
-  case class Header (m:Map[AttrName, Datatype]) {
-    def apply (a:AttrName) = m(a)
-    def keySet = m.keySet.toSet
-    def datatype (a:AttrName) : Datatype = m(a)
-    def contains (a:AttrName) : Boolean = m contains a
+  case class RelName(n:String) {
+    override def toString = n
   }
+
+  /**
+   * a Header maps an attribute name to a SQL datatype
+   * the set of attributes names can be extracted (it's really not ordered)
+   */
+  case class Header(private val m:Map[AttrName, Datatype]) extends PartialFunction[AttrName, Datatype] {
+    def apply(a:AttrName) = m(a)
+    def isDefinedAt(a:AttrName) = m isDefinedAt a
+    def attrNames:Set[AttrName] = m.keySet.toSet
+  }
+
   object Header {
     def apply (s:(String, Datatype)*):Header =
       Header(s map { case (name, datatype) => (AttrName(name), datatype) } toMap)
   }
 
+  /**
+   * a tuple maps an attribute name to a value in a cell
+   * the tuple does not carry the information from the header so when it's needed,
+   * this has to be retrieved from the relation
+   * especially, the order of the attribute names is not known within the tuple
+   */
+  case class Tuple (private val m:Map[AttrName, CellValue]) extends PartialFunction[AttrName, CellValue] {
+    def apply (a:AttrName) = m(a)
+
+    def isDefinedAt(a:AttrName) = m isDefinedAt a
+
+    /**
+     * returns all the lexical values corresponding to the given as
+     * the order from the list is preserved
+     * we assume the values are restricted to non null cells
+     * in particular, it's safe to call giving a primary key
+     */
+    def lexvalues (as:AttrList /* forall a in as, a is in this Tuple */):List[LexicalValue] =
+      as.attrs map {
+        m(_) match {
+          case lexicalValue @ LexicalValue(_) => lexicalValue
+          case ␀ => error("this value MUST not be null")
+        }
+      }
+
+    def cellvalues (as:AttrList /* forall a in as, a is in this Tuple */):List[CellValue] =
+      as.attrs map { m(_) }
+
+    def references(r:Relation):Set[ForeignKey] = {
+      val nullAttributes:Set[AttrName] =
+	m collect { case (attrName, cellValue) if cellValue == ␀ => attrName } toSet
+
+      r.fks filter { case ForeignKey(as, _) => nullAttributes & as.toSet isEmpty  }
+    }
+
+    def scalars(r:Relation):Set[AttrName] = {
+      val notNullAttributes:Set[AttrName] =
+	m collect { case (attrName, cellValue) if cellValue != ␀ => attrName } toSet
+
+      notNullAttributes filterNot { attrName => r.fks definesActuallyUnaryFK attrName }
+    }
+  }
+
+  object Tuple {
+    def apply (s:(String, CellValue)*):Tuple =
+      Tuple(s map { case (name, cellValue) => (AttrName(name), cellValue) } toMap)
+  }
+
+  abstract class CellValue
+  case class LexicalValue(s:String) extends CellValue
+  case object ␀ extends CellValue
+
+  case class ForeignKeys (private val fks:Set[ForeignKey]) {
+    def has(as:AttrList) = fks exists { _.attrs == as }
+    def getFK(as:AttrList):ForeignKey = fks find { _.attrs == as } get
+    def unaryFKs:Set[ForeignKey] = fks filter { _.isUnary }
+    def definesActuallyUnaryFK(a:AttrName):Boolean = unaryFKs exists { _.attrs contains a }
+    def filter(p: ForeignKey => Boolean):Set[ForeignKey] = fks filter p
+  }
+
+  object ForeignKeys {
+    def apply (fks:(List[String], Target)*):ForeignKeys =
+      ForeignKeys(fks map { case (keys, target) => ForeignKey(keys map { AttrName(_) }, target)} toSet)
+  }
+
+  case class ForeignKey(attrs:List[AttrName], target:Target) extends AttrList
+
+  case class Target(rel:RelName, key:CandidateKey)
+
+  case class CandidateKey (attrs:List[AttrName]) extends AttrList
+
+  object CandidateKey {
+    def apply (l:String*):CandidateKey =
+      CandidateKey(l map { AttrName(_) } toList)
+  }
+
   trait AttrList {
     val attrs:List[AttrName]
     def isUnary:Boolean = attrs.length == 1
   }
 
-  case class ForeignKey (attrs:List[AttrName], target:Target) extends AttrList {
-    def toSet = attrs.toSet
+  object AttrList {
+    // it's always ok to automatically promote a single AttrName to an AttrList
+    implicit def promoteAttrsAsAttrList(a:AttrName) = new AttrList { val attrs = List[AttrName](a) }
   }
 
-  case class CandidateKey (attrs:List[AttrName]) extends AttrList
-  object CandidateKey {
-    def apply (l:String*):CandidateKey =
-      CandidateKey(l.toList map { AttrName(_) })
+  case class AttrName(n:String) {
+    override def toString = n
   }
-  implicit def cc2list (cc:CandidateKey) = cc.attrs
-
-  case class ForeignKeys (fks:Set[ForeignKey]) {
-    def has(as:AttrList) = fks exists { _.attrs == as }
-    def getFK(as:AttrList):ForeignKey = fks find { _.attrs == as } get
-    def unaryFKs:Set[ForeignKey] = fks filter { _.isUnary }
-    def definesActuallyUnaryFK(a:AttrName):Boolean = unaryFKs exists { _.attrs contains a }
-    def filter(p: ForeignKey => Boolean):Set[ForeignKey] = fks filter p
-    def -(as:AttrList):ForeignKeys = ForeignKeys(fks filter { _.attrs == as })
-  }
-
-  object ForeignKeys {
-    def apply (fks:(List[String], Target)*):ForeignKeys =
-      ForeignKeys(fks map { case (keys, target) => ForeignKey(keys map { AttrName(_) }, target)} toSet)
-  }
-
-  case class Target (rel:RelName, key:CandidateKey)
 
   case class Datatype(name:String) {
     override def toString = "/* " + name + " */"
   }
+
   object Datatype {
     val CHAR = Datatype("Char")
     val VARCHAR = Datatype("Varchar")
@@ -80,53 +165,4 @@
     val DATETIME = Datatype("Datetime")
   }
 
-  // case class Tuple (m:Map[AttrName, CellValue]) {
-  //   def apply (a:AttrName) = m(a)
-  //   def lexvalue (a:AttrName) : CellValue = m(a)
-  //   def lexvaluesNoNulls (as:List[AttrName]) = as map { m(_).asInstanceOf[LexicalValue] }
-  //   def nullAttributes (h:Header) : Set[AttrName] = h.keySet filter { m(_) == ␀ }
-  // }
-  case class Tuple (private val m:Map[AttrName, CellValue]) extends PartialFunction[AttrName, CellValue] {
-    def apply (a:AttrName) = m(a)
-    def isDefinedAt(a:AttrName) = m isDefinedAt a
-    // assumes that the AttrName does not correspond to null values
-    // for example, it's ok to call it with PK attributes
-    // { as | forall a in as, a in Tuple }
-    def notNullLexvalues (as:AttrList):List[LexicalValue] =
-      as.attrs map {
-        m(_) match {
-          case lexicalValue @ LexicalValue(_) => lexicalValue
-          case ␀ => error("this value MUST not be null")
-        }
-      }
-    def nullAttributes : Set[AttrName] = m collect { case (attrName, cellValue) if cellValue == ␀ => attrName } toSet
-    def notNullAttributes : Set[AttrName] = m collect { case (attrName, cellValue) if cellValue != ␀ => attrName } toSet
-  }
-  object Tuple {
-    def apply (s:(String, CellValue)*):Tuple =
-      Tuple(s map { case (name, cellValue) => (AttrName(name), cellValue) } toMap)
-  }
-
-  abstract class CellValue
-  case class LexicalValue (s:String) extends CellValue
-  case object ␀ extends CellValue
-
-  case class RelName(n:String) {
-    override def toString = n
-  }
-  case class AttrName(n:String) {
-    override def toString = n
-  }
-
-  def references (t:Tuple, r:Relation):Set[ForeignKey] = {
-    val nulls:Set[AttrName] = t.nullAttributes
-    val references = r.fks filter { case ForeignKey(as, _) => nulls & as.toSet isEmpty  }
-    references
-  }
-
-  def scalars (t:Tuple, r:Relation):Set[AttrName] = {
-    val notNulls:Set[AttrName] = t.notNullAttributes
-    notNulls filterNot { attrName => r.fks definesActuallyUnaryFK attrName }
-  }
-
 }
--- a/sql/src/main/scala/SQL.scala	Sat Feb 12 18:33:02 2011 -0500
+++ b/sql/src/main/scala/SQL.scala	Sat Feb 12 22:48:05 2011 -0500
@@ -196,7 +196,7 @@
     case l~x => RDB.Database(l.foldLeft(Map[RDB.RelName, RDB.Relation]())((m, p) => {
       p match {
 	case Create(rn:RDB.RelName, relation:RDB.Relation) => m + (rn -> relation)
-	case Insert(rn:RDB.RelName, tuple:RDB.Tuple) => m + (rn -> (m(rn) ++ tuple)) // add the tuple
+	case Insert(rn:RDB.RelName, tuple:RDB.Tuple) => m + (rn -> (m(rn) + tuple)) // add the tuple
       }
     }))
   }