--- a/directmapping/src/main/scala/DirectMapping.scala Sat Feb 12 18:33:02 2011 -0500
+++ b/directmapping/src/main/scala/DirectMapping.scala Sat Feb 12 22:48:05 2011 -0500
@@ -8,9 +8,16 @@
lazy val DirectMapping = new DirectMapping { }
+ /**
+ * The mapping functions implementing
+ * <http://www.w3.org/2001/sw/rdb2rdf/directGraph/>
+ */
trait DirectMapping {
- // should be done by BNode
+ /**
+ * trick to reset the BNode generation, which is predictable using this function
+ * the RDF module should provide this functionality at some point
+ */
private var NextBNode = 97
def freshbnode () : BNode = {
val ret = NextBNode
@@ -18,16 +25,33 @@
BNode(ret.toChar.toString)
}
- // equivalent to RelName -> CandidateKey -> List[CellValue] -> Node
+ /**
+ * given:
+ * - rn a RelName
+ * - ck a CandidateKey <- ck must be part of the relation rn with name rn
+ * - vs a list of values <- vs should correspond to the values for ck
+ * a function of type NodeMap maps these elements to a unique RDF node
+ * it's type is equivalent to the following curried function:
+ * RelName -> CandidateKey -> List[CellValue] -> Node
+ * it verifies the following axiom:
+ * ∀ r:Relation, ∀ ck:CandidateKey, ck ∊ Relation, ∀ t1:Tuple, ∀ t2:Tuple,
+ * t1 ≠ t2 -> l1 = t1(ck) -> l2 = t2(ck) -> l1 = l2 -> nodemap(r)(ck)(l1) ≠ nodemap(r)(ck)(l1)
+ */
type NodeMap = PartialFunction[RelName, KeyMap]
+ /**
+ * dbToNodeMap builds the NodeMap making the tuple Node accessible to their candidate keys
+ * it's defined only for the indexable table, as we need to have at least one candidate key
+ */
def dbToNodeMap(db:Database):NodeMap =
db.indexables map { rn => rn -> keyMapForRelation(db(rn)) } toMap
/**
- * A KeyMap associates the candidate key and key values with the
- * node for any tuple in a unique relation.
- * CandidateKey -> List[CellValue] -> Node
+ * given:
+ * - ck a CandidateKey <- all the cks are expected coming from the same relation
+ * - vs a list of values <- vs should correspond to the values for ck
+ * a function of type KeyMap maps these elements to a unique RDF node
+ * CandidateKey -> List[CellValue] -> Node
*/
type KeyMap = PartialFunction[CandidateKey, PartialFunction[List[CellValue], Node]]
@@ -47,21 +71,22 @@
} }
}
val m = Map[CandidateKey, Map[List[CellValue], Node]]()
- val tuples = r.body map { t => tupleToNodeIRI(t, r) }
+ val tuples = r.body map { t => tupleToNode(t, r) }
tuples.foldLeft(m){ case (m, (pairs, node)) => ++(m, pairs, node) }
}
/**
- * The mapping functions implementing
- * <http://www.w3.org/2001/sw/rdb2rdf/directGraph/>
+ * maps a tuple to a unique rdf node, to be accessible though its candidate keys
+ * the node is:
+ * * if there is a primary key: the constant IRI based on the actual values from the pk
+ * * if no primary key: a fresh bnode
*/
-
- def tupleToNodeIRI (t:Tuple, r:Relation) : (List[(CandidateKey, List[CellValue])], Node) = {
+ def tupleToNode (t:Tuple, r:Relation) : (List[(CandidateKey, List[CellValue])], Node) = {
val s:Node =
r.pk match {
case Some(pk) =>
/** Table has a primkary key. */
- NodeIRI(iri(r, pk, t.notNullLexvalues(pk)))
+ NodeIRI(iri(r, pk, t.lexvalues(pk)))
case None =>
/** Table has no primkary key (but has some candidate keys). */
NodeBNode(freshbnode())
@@ -69,44 +94,55 @@
(r.candidates map { k => (k, k.attrs map { t(_) }) }, s)
}
- /** The triples-generating functions start with databasemap: */
- def databaseSemantics (db:Database) : Graph = {
+ /**
+ * Main function expressing the RDF semantics of a SQL database
+ */
+ def databaseSemantics(db:Database):Graph = {
NextBNode = 97
- val nodeMap = dbToNodeMap(db)
- Graph(db.keySet flatMap { (rn:RelName) => relationSemantics(db(rn), nodeMap, db) })
+ val nodemap = dbToNodeMap(db)
+ Graph(db.relNames flatMap { rn:RelName => relationSemantics(db, nodemap, db(rn)) })
}
- def relationSemantics (r:Relation, nodes:NodeMap, db:Database) : Graph =
+ def relationSemantics(db:Database, nodemap:NodeMap, r:Relation):Graph =
/* flatMap.toSet assumes that no two triples from directT would be the same.
* We know this because relations with candidate keys are mapped to unique
* subjects, and potentially redundant rows get unique blank node subjects.
*/
- Graph(r.body flatMap { t => tupleSemantics(t, r, nodes, db) })
+ Graph(r.body flatMap { t => tupleSemantics(db, nodemap, r, t) })
- def tupleSemantics (t:Tuple, r:Relation, nodes:NodeMap, db:Database) : Set[Triple] = {
+ def tupleSemantics (db:Database, nodemap:NodeMap, r:Relation, t:Tuple):Set[Triple] = {
val s:Node =
+ // look for the first candidate key if available
r.candidates.headOption match {
- // Known to have at least one key, so take the first one.
+ // if there is a candidate key, we know we can retrieve the mapped node
+ // null values are ok at that point
case Some(firstKey) => {
- val vs = t.notNullLexvalues(firstKey)
- nodes(r.name)(firstKey)(vs)
+ val cellvalues = t.cellvalues(firstKey)
+ nodemap(r.name)(firstKey)(cellvalues)
}
- /** Table has no candidate keys. */
+ // there is no candidate key, we have to come up with a new bnode
case None =>
NodeBNode(freshbnode())
}
- val refs = references(t, r) map { referenceSemantics(s, _, r, t, nodes) }
- val scals = scalars(t, r) flatMap { lexicalValueSemantics(r.name, s, _, r.header, t) }
+ // the foreign keys create triples
+ val triplesFromFKs = t.references(r) map { referenceSemantics(s, _, r, t, nodemap) }
+ // the lexical values (ie. not null values) also create triples
+ val triplesFromLexicalValues = t.scalars(r) flatMap { lexicalValueSemantics(r, s, _, t) }
+ // the relation provenance is mapped to an RDF type information, computed from the relation itself
val triple = Triple(SubjectNode(s),
PredicateIRI(IRI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")),
ObjectNode(NodeIRI(IRI(UE(r)))))
- refs ++ scals + triple
+ triplesFromFKs ++ triplesFromLexicalValues + triple
}
-
- def lexicalValueSemantics (rn:RelName, s:Node, a:AttrName, h:Header, t:Tuple) : Option[Triple] = {
- val p = predicateSemantics (rn, new AttrList { val attrs = List(a) } )
+
+ /**
+ *
+ */
+ def lexicalValueSemantics(r:Relation, s:Node, a:AttrName, t:Tuple):Option[Triple] = {
+ // a is implicitly promoted to an AttrList
+ val p = predicateSemantics(r, a)
val cellValue = t(a)
- val datatype = h.datatype(a)
+ val datatype = r.header(a)
(cellValue, datatype) match {
case (LexicalValue(l), Datatype.STRING) => {
val o = PlainLiteral(l, None)
@@ -121,21 +157,21 @@
}
def referenceSemantics (s:Node, fk:ForeignKey, r:Relation, t:Tuple, nodes:NodeMap) : Triple = {
- val p = predicateSemantics (r.name, fk)
- val ls:List[LexicalValue] = t.notNullLexvalues(fk)
+ val p = predicateSemantics (r, fk)
+ val cellvalues:List[CellValue] = t.cellvalues(fk)
val ForeignKey(as, Target(rel, key)) = fk
if (!(nodes isDefinedAt rel))
error("No referent relation \"" + rel + "\" to match " + r.name + t)
if (!(nodes(rel) isDefinedAt key))
error("Relation " + rel + " has no attributes (" + key + ") to match " + r.name + t)
- if (!(nodes(rel)(key) isDefinedAt ls))
- error("Relation " + rel + "(" + key + ") has no values " + ls + " to match " + r.name + t)
- val o:Object = ObjectNode(nodes(rel)(key)(ls))
+ if (!(nodes(rel)(key) isDefinedAt cellvalues))
+ error("Relation " + rel + "(" + key + ") has no values " + cellvalues + " to match " + r.name + t)
+ val o:Object = ObjectNode(nodes(rel)(key)(cellvalues))
Triple(SubjectNode(s), PredicateIRI(p), o)
}
-
- def predicateSemantics (rn:RelName, as:AttrList) : IRI =
- IRI(UE(rn) + "#" + as.attrs.mkString("_"))
+
+ def predicateSemantics (r:Relation, as:AttrList) : IRI =
+ IRI(UE(r) + "#" + as.attrs.mkString("_"))
// TODO: aren't they already part of the RDF model?
def datatypeSemantics (d:Datatype) : IRI =
--- a/rdb/src/main/scala/RDB.scala Sat Feb 12 18:33:02 2011 -0500
+++ b/rdb/src/main/scala/RDB.scala Sat Feb 12 22:48:05 2011 -0500
@@ -5,68 +5,153 @@
*/
object RDB {
- case class Database (m:Map[RelName, Relation]) {
- def apply (rn:RelName) = m(rn)
- def keySet = m.keySet.toSet
+ /**
+ * a Database maps a relation name to the actual Relation
+ */
+ case class Database(private val m:Map[RelName, Relation]) extends PartialFunction[RelName, Relation] {
+ def apply(rn:RelName) = m(rn)
+ def isDefinedAt(rn:RelName) = m isDefinedAt rn
+ /** returns all the relation names */
+ def relNames:Set[RelName] = m.keySet.toSet
+ /** return all the relations with at least on Candidate Key */
def indexables = m collect { case (rn, r) if r.isIndexable => rn }
}
+
object Database {
def apply (l:Relation*):Database =
Database(l map { r => (r.name -> r) } toMap)
}
- case class Relation (name:RelName, header:Header, body:List[Tuple], candidates:List[CandidateKey], pk:Option[CandidateKey], fks:ForeignKeys) {
- // TODO: should be + instead of ++
- def ++ (t:Tuple):Relation = this.copy(body = body :+ t)
+ /**
+ * definition of a Relation
+ * pk should be seen as the function extracting the unique Primary Key from candidates (if present)
+ * the current SQL parser ensures the pk is materialized twice
+ */
+ case class Relation(name:RelName,
+ header:Header,
+ body:List[Tuple],
+ candidates:List[CandidateKey],
+ pk:Option[CandidateKey],
+ fks:ForeignKeys) {
+ /** adds a tuple in the body of the relation */
+ def +(t:Tuple):Relation = this.copy(body = body :+ t)
+ /** a relation is indexable if it has at least one candidate key */
def isIndexable:Boolean = candidates.nonEmpty
}
- case class Header (m:Map[AttrName, Datatype]) {
- def apply (a:AttrName) = m(a)
- def keySet = m.keySet.toSet
- def datatype (a:AttrName) : Datatype = m(a)
- def contains (a:AttrName) : Boolean = m contains a
+ case class RelName(n:String) {
+ override def toString = n
}
+
+ /**
+ * a Header maps an attribute name to a SQL datatype
+ * the set of attributes names can be extracted (it's really not ordered)
+ */
+ case class Header(private val m:Map[AttrName, Datatype]) extends PartialFunction[AttrName, Datatype] {
+ def apply(a:AttrName) = m(a)
+ def isDefinedAt(a:AttrName) = m isDefinedAt a
+ def attrNames:Set[AttrName] = m.keySet.toSet
+ }
+
object Header {
def apply (s:(String, Datatype)*):Header =
Header(s map { case (name, datatype) => (AttrName(name), datatype) } toMap)
}
+ /**
+ * a tuple maps an attribute name to a value in a cell
+ * the tuple does not carry the information from the header so when it's needed,
+ * this has to be retrieved from the relation
+ * especially, the order of the attribute names is not known within the tuple
+ */
+ case class Tuple (private val m:Map[AttrName, CellValue]) extends PartialFunction[AttrName, CellValue] {
+ def apply (a:AttrName) = m(a)
+
+ def isDefinedAt(a:AttrName) = m isDefinedAt a
+
+ /**
+ * returns all the lexical values corresponding to the given as
+ * the order from the list is preserved
+ * we assume the values are restricted to non null cells
+ * in particular, it's safe to call giving a primary key
+ */
+ def lexvalues (as:AttrList /* forall a in as, a is in this Tuple */):List[LexicalValue] =
+ as.attrs map {
+ m(_) match {
+ case lexicalValue @ LexicalValue(_) => lexicalValue
+ case ␀ => error("this value MUST not be null")
+ }
+ }
+
+ def cellvalues (as:AttrList /* forall a in as, a is in this Tuple */):List[CellValue] =
+ as.attrs map { m(_) }
+
+ def references(r:Relation):Set[ForeignKey] = {
+ val nullAttributes:Set[AttrName] =
+ m collect { case (attrName, cellValue) if cellValue == ␀ => attrName } toSet
+
+ r.fks filter { case ForeignKey(as, _) => nullAttributes & as.toSet isEmpty }
+ }
+
+ def scalars(r:Relation):Set[AttrName] = {
+ val notNullAttributes:Set[AttrName] =
+ m collect { case (attrName, cellValue) if cellValue != ␀ => attrName } toSet
+
+ notNullAttributes filterNot { attrName => r.fks definesActuallyUnaryFK attrName }
+ }
+ }
+
+ object Tuple {
+ def apply (s:(String, CellValue)*):Tuple =
+ Tuple(s map { case (name, cellValue) => (AttrName(name), cellValue) } toMap)
+ }
+
+ abstract class CellValue
+ case class LexicalValue(s:String) extends CellValue
+ case object ␀ extends CellValue
+
+ case class ForeignKeys (private val fks:Set[ForeignKey]) {
+ def has(as:AttrList) = fks exists { _.attrs == as }
+ def getFK(as:AttrList):ForeignKey = fks find { _.attrs == as } get
+ def unaryFKs:Set[ForeignKey] = fks filter { _.isUnary }
+ def definesActuallyUnaryFK(a:AttrName):Boolean = unaryFKs exists { _.attrs contains a }
+ def filter(p: ForeignKey => Boolean):Set[ForeignKey] = fks filter p
+ }
+
+ object ForeignKeys {
+ def apply (fks:(List[String], Target)*):ForeignKeys =
+ ForeignKeys(fks map { case (keys, target) => ForeignKey(keys map { AttrName(_) }, target)} toSet)
+ }
+
+ case class ForeignKey(attrs:List[AttrName], target:Target) extends AttrList
+
+ case class Target(rel:RelName, key:CandidateKey)
+
+ case class CandidateKey (attrs:List[AttrName]) extends AttrList
+
+ object CandidateKey {
+ def apply (l:String*):CandidateKey =
+ CandidateKey(l map { AttrName(_) } toList)
+ }
+
trait AttrList {
val attrs:List[AttrName]
def isUnary:Boolean = attrs.length == 1
}
- case class ForeignKey (attrs:List[AttrName], target:Target) extends AttrList {
- def toSet = attrs.toSet
+ object AttrList {
+ // it's always ok to automatically promote a single AttrName to an AttrList
+ implicit def promoteAttrsAsAttrList(a:AttrName) = new AttrList { val attrs = List[AttrName](a) }
}
- case class CandidateKey (attrs:List[AttrName]) extends AttrList
- object CandidateKey {
- def apply (l:String*):CandidateKey =
- CandidateKey(l.toList map { AttrName(_) })
+ case class AttrName(n:String) {
+ override def toString = n
}
- implicit def cc2list (cc:CandidateKey) = cc.attrs
-
- case class ForeignKeys (fks:Set[ForeignKey]) {
- def has(as:AttrList) = fks exists { _.attrs == as }
- def getFK(as:AttrList):ForeignKey = fks find { _.attrs == as } get
- def unaryFKs:Set[ForeignKey] = fks filter { _.isUnary }
- def definesActuallyUnaryFK(a:AttrName):Boolean = unaryFKs exists { _.attrs contains a }
- def filter(p: ForeignKey => Boolean):Set[ForeignKey] = fks filter p
- def -(as:AttrList):ForeignKeys = ForeignKeys(fks filter { _.attrs == as })
- }
-
- object ForeignKeys {
- def apply (fks:(List[String], Target)*):ForeignKeys =
- ForeignKeys(fks map { case (keys, target) => ForeignKey(keys map { AttrName(_) }, target)} toSet)
- }
-
- case class Target (rel:RelName, key:CandidateKey)
case class Datatype(name:String) {
override def toString = "/* " + name + " */"
}
+
object Datatype {
val CHAR = Datatype("Char")
val VARCHAR = Datatype("Varchar")
@@ -80,53 +165,4 @@
val DATETIME = Datatype("Datetime")
}
- // case class Tuple (m:Map[AttrName, CellValue]) {
- // def apply (a:AttrName) = m(a)
- // def lexvalue (a:AttrName) : CellValue = m(a)
- // def lexvaluesNoNulls (as:List[AttrName]) = as map { m(_).asInstanceOf[LexicalValue] }
- // def nullAttributes (h:Header) : Set[AttrName] = h.keySet filter { m(_) == ␀ }
- // }
- case class Tuple (private val m:Map[AttrName, CellValue]) extends PartialFunction[AttrName, CellValue] {
- def apply (a:AttrName) = m(a)
- def isDefinedAt(a:AttrName) = m isDefinedAt a
- // assumes that the AttrName does not correspond to null values
- // for example, it's ok to call it with PK attributes
- // { as | forall a in as, a in Tuple }
- def notNullLexvalues (as:AttrList):List[LexicalValue] =
- as.attrs map {
- m(_) match {
- case lexicalValue @ LexicalValue(_) => lexicalValue
- case ␀ => error("this value MUST not be null")
- }
- }
- def nullAttributes : Set[AttrName] = m collect { case (attrName, cellValue) if cellValue == ␀ => attrName } toSet
- def notNullAttributes : Set[AttrName] = m collect { case (attrName, cellValue) if cellValue != ␀ => attrName } toSet
- }
- object Tuple {
- def apply (s:(String, CellValue)*):Tuple =
- Tuple(s map { case (name, cellValue) => (AttrName(name), cellValue) } toMap)
- }
-
- abstract class CellValue
- case class LexicalValue (s:String) extends CellValue
- case object ␀ extends CellValue
-
- case class RelName(n:String) {
- override def toString = n
- }
- case class AttrName(n:String) {
- override def toString = n
- }
-
- def references (t:Tuple, r:Relation):Set[ForeignKey] = {
- val nulls:Set[AttrName] = t.nullAttributes
- val references = r.fks filter { case ForeignKey(as, _) => nulls & as.toSet isEmpty }
- references
- }
-
- def scalars (t:Tuple, r:Relation):Set[AttrName] = {
- val notNulls:Set[AttrName] = t.notNullAttributes
- notNulls filterNot { attrName => r.fks definesActuallyUnaryFK attrName }
- }
-
}
--- a/sql/src/main/scala/SQL.scala Sat Feb 12 18:33:02 2011 -0500
+++ b/sql/src/main/scala/SQL.scala Sat Feb 12 22:48:05 2011 -0500
@@ -196,7 +196,7 @@
case l~x => RDB.Database(l.foldLeft(Map[RDB.RelName, RDB.Relation]())((m, p) => {
p match {
case Create(rn:RDB.RelName, relation:RDB.Relation) => m + (rn -> relation)
- case Insert(rn:RDB.RelName, tuple:RDB.Tuple) => m + (rn -> (m(rn) ++ tuple)) // add the tuple
+ case Insert(rn:RDB.RelName, tuple:RDB.Tuple) => m + (rn -> (m(rn) + tuple)) // add the tuple
}
}))
}