--- a/.hgignore Mon Feb 21 14:11:37 2011 -0500
+++ b/.hgignore Sun Mar 13 16:47:10 2011 -0400
@@ -15,3 +15,4 @@
.\#*
^\.emacs\.desktop\.lock$
^\.emacs\.desktop$
+build.properties
--- a/directmapping-test/src/main/scala/DirectMappingTestSuite.scala Mon Feb 21 14:11:37 2011 -0500
+++ b/directmapping-test/src/main/scala/DirectMappingTestSuite.scala Sun Mar 13 16:47:10 2011 -0400
@@ -8,42 +8,6 @@
import org.scalatest.FunSuite
-trait FundamentalTest extends FunSuite with DirectMappingModule with RDFImplicits {
-
- import DirectMapping._
-
- test("NodeMap") {
-
- val ck1:CandidateKey = CandidateKey("name", "ssn")
- val ck2:CandidateKey = CandidateKey("ID")
- val v11:List[CellValue] = List(LexicalValue("bob"), LexicalValue("123"))
- val v21:List[CellValue] = List(LexicalValue("alice"), LexicalValue("8"))
- val v12:List[CellValue] = List(LexicalValue("18"))
- val v22:List[CellValue] = List(LexicalValue("23"))
- val s1:Node = NodeBNode(BNode("1"))
- val s2:Node = NodeBNode(BNode("2"))
- val data:Set[(List[(CandidateKey, List[CellValue])], Node)] =
- Set((List((ck1, v11),(ck2, v21)), s1),
- (List((ck1, v12),(ck2, v22)), s2))
- val test = data.foldLeft(KeyMap(Map[CandidateKey, Map[List[CellValue], Node]]()))((m, t) => m ++ (t._1, t._2))
-
- val goal:KeyMap = KeyMap(
- Map(ck1 -> Map(v11 -> s1,
- v12 -> s2),
- ck2 -> Map(v21 -> s1,
- v22 -> s2))
- )
- assert(goal === test)
- }
-
-}
-
-
-
-
-
-
-
trait DirectMappingTest extends FunSuite with RDFModel with RDFImplicits with DirectMappingModule with TurtleModule {
import DirectMapping._
@@ -52,38 +16,27 @@
val turtleParser = new TurtleParser { }
- def testDirectMapping(testName:String, db:Database, expectedGraph:Graph, respectHierarchy:Boolean):Unit =
+ def testDirectMapping(testName:String, db:Database, expectedGraph:Graph):Unit =
test(testName) {
- DirectMapping.HierarchyDetection = respectHierarchy
- val computedGraph = directDB(db)
- DirectMapping.HierarchyDetection = true
- DirectMapping.NextBNode = 97 // @@ call the "i'd like to reset my fresh variables to 0 so i can have predictable node names" function
+ val computedGraph = databaseSemantics(db)
assert (expectedGraph === computedGraph)
}
- def testDirectMapping(testName:String, dbFile:File, expectedGraphFile:File, respectHierarchy:Boolean):Unit = {
+ def testDirectMapping(testName:String, dbFile:File, expectedGraphFile:File):Unit = {
val db = SqlParser.toDB(dbFile)
val expectedGraph:Graph = turtleParser.toGraph(expectedGraphFile)
- testDirectMapping(testName, db, expectedGraph, respectHierarchy)
+ testDirectMapping(testName, db, expectedGraph)
}
def testDirectMappingSpec(testName:String):Unit =
testDirectMapping(testName,
new File("./sharedtestdata/directmappingspec/" + testName + ".sql"),
- new File("./sharedtestdata/directmappingspec/" + testName + ".ttl"),
- true)
-
- def testDirectMappingSpecNoHierarchy(testName:String):Unit =
- testDirectMapping(testName,
- new File("./sharedtestdata/directmappingspec/" + testName + ".sql"),
- new File("./sharedtestdata/directmappingspec/" + testName + ".ttl"),
- false)
+ new File("./sharedtestdata/directmappingspec/" + testName + ".ttl"))
def testDirectMapping(testName:String):Unit =
testDirectMapping(testName,
new File("./sharedtestdata/tests/" + testName + ".sql"),
- new File("./sharedtestdata/tests/" + testName + ".ttl"),
- false)
+ new File("./sharedtestdata/tests/" + testName + ".ttl"))
// 2 People 1 Addresses
testDirectMappingSpec("emp_addr")
@@ -99,8 +52,6 @@
// 1 People 1 Addresses 1 Offices 1 ExectutiveOffices
testDirectMappingSpec("hier_tabl")
- // Try that again without respecting hierarchies
- testDirectMappingSpecNoHierarchy("non-hier_tabl")
// !!! goes in different module
--- a/directmapping-test/src/test/scala/DirectMappingTest.scala Mon Feb 21 14:11:37 2011 -0500
+++ b/directmapping-test/src/test/scala/DirectMappingTest.scala Sun Mar 13 16:47:10 2011 -0400
@@ -1,7 +1,5 @@
package org.w3.directmapping
-import org.w3.rdf.ConcreteModel
+import org.w3.rdf.jena._
-class FundamentalTestWithRDF extends FundamentalTest with ConcreteModel
-
-class DirectMappingTestWithConcreteModel extends DirectMappingTest with ConcreteModel
+class DirectMappingTestWithConcreteModel extends DirectMappingTest with JenaModel
--- a/directmapping-webapp/src/main/scala/Servlet.scala Mon Feb 21 14:11:37 2011 -0500
+++ b/directmapping-webapp/src/main/scala/Servlet.scala Sun Mar 13 16:47:10 2011 -0400
@@ -293,7 +293,7 @@
try {
val db = SQLParser.toDB(sql)
DirectMapping.MinEncode = minEncode
- val computedGraph:Graph = directDB(db)
+ val computedGraph:Graph = databaseSemantics(db)
jenaSerializer(computedGraph)
} catch {
case e => e.getMessage
--- a/directmapping/src/main/scala/DirectMapping.scala Mon Feb 21 14:11:37 2011 -0500
+++ b/directmapping/src/main/scala/DirectMapping.scala Sun Mar 13 16:47:10 2011 -0400
@@ -9,205 +9,131 @@
lazy val DirectMapping = new DirectMapping { }
+ /**
+ * The mapping functions implementing
+ * <http://www.w3.org/2001/sw/rdb2rdf/directGraph/>
+ */
trait DirectMapping {
- /** A KeyMap associates the candidate key and key values with the
- * node for any tuple in a unique relation. */
- case class KeyMap(m:Map[CandidateKey, Map[List[CellValue], Node]]) {
- //def KeyMap() : KeyMap = KeyMap(Map[CandidateKey, Map[List[CellValue], Node]]())
- def apply(i:CandidateKey) : Map[List[CellValue], Node] = m(i)
- def ++(pairs:List[(CandidateKey, List[CellValue])], n:Node):KeyMap = {
- val m2:Map[CandidateKey, Map[List[CellValue], Node]] =
- pairs.foldLeft(m) { case (m, (ck, cellValues)) => {
- m.get(ck) match {
- case Some(byKey) if byKey.get(cellValues).isDefined =>
- error("tried to set " + ck + cellValues + " = " + n + "(was " + byKey(cellValues) + ")")
- case Some(byKey) =>
- m + (ck -> (byKey + (cellValues -> n)))
- case None =>
- m + (ck -> Map(cellValues -> n))
- }
- } }
- KeyMap(m2)
- }
- def contains(ck:CandidateKey) = m contains ck
- }
- case class NodeMap(m:Map[RelName, KeyMap]) {
- def apply(rn:RelName) = m(rn)
- def ultimateReferent (rn:RelName, k:CandidateKey, vs:List[LexicalValue], db:Database) : Node = {
- // Issue: What if fk is a rearrangement of the pk, per issue fk-pk-order?
- (HierarchyDetection, db(rn).pk) match {
- case (true, Some(pk)) if db(rn).fks contains(pk.attrs) => {
- /** Table's primary key is a foreign key. */
- val target = db(rn).fks(ForeignKeyKey(pk.attrs))
- ultimateReferent(target.rel, target.key, vs, db)
- }
- case (_, _) =>
- m(rn)(k)(vs)
- }
- }
- def contains(rn:RelName) = m.contains(rn)
- }
- implicit def list2map (l:Set[(RelName, KeyMap)]):Map[RelName,KeyMap] = l.toMap
- implicit def list2Nmap (l:Set[(RelName, KeyMap)]):NodeMap = NodeMap(l)
-
- /** The direct mapping requires one parameter: the StemIRI */
- case class StemIRI(stem:String) {
- def +(path:String):IRI = IRI(stem + path)
- }
-
- /**
- * Switch for special case for hierarchies:
- */
- var HierarchyDetection = true
-
- var MinEncode = true
-
/**
- * The mapping functions implementing
- * <http://www.w3.org/2001/sw/rdb2rdf/directGraph/>
+ * trick to reset the BNode generation, which is predictable using this function
+ * the RDF module should provide this functionality at some point
*/
-
- def references (t:Tuple, r:Relation):Set[ForeignKeyKey] = {
- val allFKs:Set[ForeignKeyKey] = r.fks.keySet
- val nulllist:Set[AttrName] = t.nullAttributes(r.header)
- val nullFKs:Set[ForeignKeyKey] = allFKs filter { fk => (nulllist & fk.toSet) nonEmpty }
-
- /** Check to see if r's primary key is a hierarchical key.
- * http://www.w3.org/2001/sw/rdb2rdf/directGraph/#rule3 */
- (HierarchyDetection, r.pk) match {
- case (true, Some(pk)) if r.fks contains (pk.attrs) =>
- r.fks.keySet -- nullFKs - ForeignKeyKey(r.fks.refdAttrs(ForeignKeyKey(pk.attrs)))
- case (_, _) =>
- r.fks.keySet -- nullFKs
- }
- }
-
- def scalars (t:Tuple, r:Relation):Set[AttrName] = {
- val allAttrs:Set[AttrName] = r.header.keySet
- val allFKs:Set[ForeignKeyKey] = r.fks.keySet
- val unaryFKs:Set[AttrName] = allFKs map { _.attrs } filter { _.length == 1 } flatten
-
- /** Check to see if r's primary key is a hierarchical key.
- * http://www.w3.org/2001/sw/rdb2rdf/directGraph/#rule3 */
- (HierarchyDetection, r.pk) match {
- case (true, Some(pk)) if r.fks contains (pk.attrs) =>
- allAttrs -- unaryFKs ++ r.fks.refdAttrs(ForeignKeyKey(pk.attrs))
- case (_, _) =>
- allAttrs -- unaryFKs
- }
- }
-
- /** The NodeMap-generating functions: */
- def relation2KeyMap (r:Relation) : KeyMap = {
- val m = KeyMap(Map[CandidateKey, Map[List[CellValue], Node]]())
- r.body.foldLeft(m) { (m, t) => {
- val (pairs, node) = rdfNodeForTuple(t, r)
- m ++ (pairs, node)
- } }
- }
-
- def rdfNodeForTuple (t:Tuple, r:Relation) : (List[(CandidateKey, List[CellValue])], Node) = {
- val s:Node =
- r.pk match {
- case Some(pk) =>
- /** Table has a primkary key. */
- NodeIRI(nodemap(r.name, pk.attrs, t.lexvaluesNoNulls(pk.attrs)))
- case None =>
- /** Table has no primkary key (but has some candidate keys). */
- NodeBNode(freshbnode())
- }
- (r.candidates map { k => (k, k.attrs map { t(_) }) }, s)
- }
-
- /** The triples-generating functions start with databasemap: */
- def directDB (db:Database) : Graph = {
- val idxables = db.keySet filter { rn => db(rn).candidates nonEmpty }
- val nodeMap:NodeMap = idxables map { rn => rn -> relation2KeyMap(db(rn)) }
- Graph(db.keySet flatMap { (rn:RelName) => directR(db(rn), nodeMap, db) })
- }
-
- def directR (r:Relation, nodes:NodeMap, db:Database) : Graph =
- /* flatMap.toSet assumes that no two triples from directT would be the same.
- * We know this because relations with candidate keys are mapped to unique
- * subjects, and potentially redundant rows get unique blank node subjects.
- */
- Graph(r.body flatMap { t => directT(t, r, nodes, db) })
-
- def directT (t:Tuple, r:Relation, nodes:NodeMap, db:Database) : Set[Triple] = {
- val s:Node =
- r.candidates.headOption match {
- // Known to have at least one key, so take the first one.
- case Some(firstKey) => {
- val vs = t.lexvaluesNoNulls(firstKey.attrs)
- nodes.ultimateReferent(r.name, firstKey, vs, db)
- }
- /** Table has no candidate keys. */
- case None =>
- NodeBNode(freshbnode())
- }
- directS(s, t, r, nodes, db)
- }
-
- def directS (s:Node, t:Tuple, r:Relation, nodes:NodeMap, db:Database) : Set[Triple] = {
- ( references(t, r) map { directN(s, _, r, t, nodes) } ) ++
- ( scalars(t, r) flatMap { directL(r.name, s, _, r.header, t) } ) +
- Triple(SubjectNode(s),
- PredicateIRI(IRI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")),
- ObjectNode(NodeIRI(IRI(UE(relName2string(r.name))))))
- }
-
- // should be done by BNode
- var NextBNode = 97
+ private var NextBNode = 97
def freshbnode () : BNode = {
val ret = NextBNode
NextBNode = NextBNode + 1
BNode(ret.toChar.toString)
}
+
+ /**
+ * TODO
+ */
+ def dbToTupleMap(db:Database):PartialFunction[Tuple, Node] =
+ db.relations map { relation => tupleMapForRelation(relation) } reduceLeft { _ orElse _ }
+
+ var MinEncode = true
+
+ /**
+ * TODO
+ */
+ def tupleMapForRelation(r:Relation):PartialFunction[Tuple, Node] = {
+ def tupleToNode(t:Tuple):Node =
+ r.pk match {
+ // Table has a primary key
+ case Some(pk) => NodeIRI(tupleToIRI(t, pk))
+ // Table has no primkary key
+ case None => NodeBNode(freshbnode())
+ }
+ r.body map { t => t -> tupleToNode(t) } toMap
+ }
+
+ /**
+ * Main function expressing the RDF semantics of a SQL database
+ */
+ def databaseSemantics(db:Database):Graph = {
+ // that makes this implementation not thread-safe
+ NextBNode = 97
+ val tuplemap = dbToTupleMap(db)
+ Graph(db.relations flatMap { r => relationSemantics(tuplemap)(r) })
+ }
- def directL (rn:RelName, s:Node, a:AttrName, h:Header, t:Tuple) : Option[Triple] = {
- val p = predicatemap (rn, List(a))
- t.lexvalue(a) match {
- case l:LexicalValue => {
- val o = literalmap(l, h.sqlDatatype(a))
- Some(Triple(SubjectNode(s),
- PredicateIRI(p),
- ObjectLiteral(o)))
- }
- case ␀ => None
- }
+ def relationSemantics(tuplemap:PartialFunction[Tuple, Node])(r:Relation):Graph =
+ Graph(r.body flatMap { t => tupleSemantics(tuplemap)(t) })
+
+ def tupleSemantics (tuplemap:PartialFunction[Tuple, Node])(t:Tuple):Set[Triple] = {
+ val s:SubjectNode = SubjectNode(tuplemap(t))
+ val poFromFKs = t.references map { fk => referenceSemantics(tuplemap)(t, fk) }
+ val poFromLexicalValues = t.scalars flatMap { a => lexicalValueSemantics(t, a) }
+ val poFromRelation = (PredicateIRI(IRI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")),
+ ObjectNode(NodeIRI(IRI(UE(t.relation)))))
+ (poFromFKs ++ poFromLexicalValues + poFromRelation) map { case (p, o) => Triple(s, p, o) }
}
- def directN (s:Node, as:ForeignKeyKey, r:Relation, t:Tuple, nodes:NodeMap) : Triple = {
- val p = predicatemap (r.name, as.attrs)
- val ls:List[LexicalValue] = t.lexvaluesNoNulls(as.attrs)
- val target = r.fks(as)
- if (!nodes.contains(target.rel))
- error("No referent relation \"" + target.rel + "\" to match " + r.name + t)
- if (!nodes(target.rel).contains(target.key))
- error("Relation " + target.rel + " has no attributes (" + target.key + ") to match " + r.name + t)
- if (!nodes(target.rel)(target.key).contains(ls))
- error("Relation " + target.rel + "(" + target.key + ") has no values " + ls + " to match " + r.name + t)
- val o:Object = ObjectNode(nodes(target.rel)(target.key)(ls))
- Triple(SubjectNode(s), PredicateIRI(p), o)
- }
-
- // These implicits make nodemap and predicatemap functions prettier.
- implicit def relName2string (rn:RelName) = rn.n
- implicit def attrName2string (rn:AttrName) = rn.n
-
- def nodemap (rn:RelName, as:List[AttrName], ls:List[LexicalValue]) : IRI = {
- val pairs:List[String] = as.zip(ls) map { case (attrName, lexicalValue) => UE(attrName) + "=" + UE(lexicalValue.s) }
- IRI(UE(rn) + "/" + pairs.mkString(",") + "#_")
- }
-
- def predicatemap (rn:RelName, as:List[AttrName]) : IRI = {
- val encoded = as.map {c => UE(c)} mkString(",")
- IRI(UE(rn) + "#" + encoded)
+ /**
+ * a foreign key contribute to generating triples
+ */
+ def referenceSemantics (tuplemap:PartialFunction[Tuple, Node])(t:Tuple, fk:ForeignKey):(Predicate, Object) = {
+ val p = referencePredicateSemantics(t.relation, fk)
+ val o = ObjectNode(tuplemap(t.dereference(fk)))
+ (PredicateIRI(p), o)
}
+ /**
+ * a lexical value contribute to generating triples (only if it is not null)
+ */
+ def lexicalValueSemantics(t:Tuple, a:AttrName):Option[(Predicate, Object)] = {
+ val r:Relation = t.relation
+ // a is implicitly promoted to an AttrList
+ val p = lexicalValuePredicateSemantics(r, a)
+ val cellValue = t(a)
+ val datatype = r.header(a)
+ (cellValue, datatype) match {
+ case (LexicalValue(l), Datatype.STRING) => {
+ val o = PlainLiteral(l, None)
+ Some(PredicateIRI(p), ObjectLiteral(o))
+ }
+ case (LexicalValue(l), d) => {
+ val o = TypedLiteral(l, datatypeSemantics(d))
+ Some(PredicateIRI(p), ObjectLiteral(o))
+ }
+ case (␀, _) => None
+ }
+
+ }
+
+ /**
+ * the generated IRI has to be "parsable" for a reverse mapping
+ */
+ def lexicalValuePredicateSemantics(r:Relation, a:AttrName):IRI =
+ IRI(UE(r) + "#" + a)
+
+ /**
+ * the generated IRI has to be "parsable" for a reverse mapping
+ */
+ def referencePredicateSemantics(r:Relation, as:AttrList):IRI =
+ IRI(UE(r) + "#" + as.attrs.mkString("_"))
+
+ /**
+ * the generated IRI has to be "parsable" for a reverse mapping
+ * this function must generate a different IRI for each tuple
+ * we know (not enforced by a type) that
+ * - as is actually a pk
+ * - the lexicalvalues are generated from pk
+ * hence, the zip operation is safe as both list have the same size
+ */
+ def tupleToIRI(t:Tuple, pk:AttrList):IRI = {
+ val r:Relation = t.relation
+ val ls:List[LexicalValue] = t.lexvalues(pk)
+ val pairs:List[String] = pk.attrs zip ls map { case (attrName, lexicalValue) => UE(attrName) + "." + UE(lexicalValue.s) }
+ IRI(UE(r) + "/" + pairs.mkString("_") + "#_")
+ }
+
+ // IRI(UE(rn) + "#" + encoded)
+ // }
+
// TODO: aren't they already part of the RDF model?
- def XSD (d:Datatype) : IRI =
+ def datatypeSemantics (d:Datatype) : IRI =
d match {
case Datatype.INTEGER => IRI("http://www.w3.org/2001/XMLSchema#integer")
case Datatype.FLOAT => IRI("http://www.w3.org/2001/XMLSchema#float")
@@ -218,13 +144,7 @@
case Datatype.VARCHAR => IRI("http://www.w3.org/2001/XMLSchema#varchar")
case Datatype.STRING => IRI("http://www.w3.org/2001/XMLSchema#string")
}
-
- def literalmap (l:LexicalValue, d:Datatype) : Literal =
- d match {
- case Datatype.STRING => PlainLiteral(l.s, None)
- case _ => TypedLiteral(l.s, XSD(d))
- }
-
+
def UE (s:String) : String = {
if (MinEncode) {
s.replaceAll("%", "%25")
@@ -240,7 +160,9 @@
r
}
}
-
+ def UE(r:Relation):String = UE(r.name.n)
+ def UE(a:AttrName):String = UE(a.n)
+
}
}
--- a/jena/src/main/scala/JenaModel.scala Mon Feb 21 14:11:37 2011 -0500
+++ b/jena/src/main/scala/JenaModel.scala Sun Mar 13 16:47:10 2011 -0400
@@ -23,13 +23,18 @@
case class IRI(iri:String) { override def toString = '"' + iri + '"' }
object IRI extends Isomorphic1[String, IRI]
- class Graph(val jenaGraph:JenaGraph) extends Iterable[Triple] {
+ class Graph(val jenaGraph:JenaGraph) extends GraphLike {
def iterator:Iterator[Triple] = new Iterator[Triple] {
val iterator = jenaGraph.find(JenaNode.ANY, JenaNode.ANY, JenaNode.ANY)
def hasNext = iterator.hasNext
def next = iterator.next
}
+ def ++(other:Graph):Graph = new Graph(new com.hp.hpl.jena.graph.compose.Union(jenaGraph, other.jenaGraph))
+ // {
+ // for(triple <- other) jenaGraph add triple
+ // this
+ // }
override def equals(o:Any):Boolean = ( o.isInstanceOf[Graph] && jenaGraph.isIsomorphicWith(o.asInstanceOf[Graph].jenaGraph) )
}
--- a/project/build/RDB2RDF.scala Mon Feb 21 14:11:37 2011 -0500
+++ b/project/build/RDB2RDF.scala Sun Mar 13 16:47:10 2011 -0400
@@ -34,7 +34,7 @@
lazy val sharedtestdata = project("sharedtestdata", "sharedtestdata", new SharedTestData(_), rdb, rdf, sql, turtle)
lazy val directmapping = project("directmapping", "directmapping", new DirectMapping(_), rdb, rdf, sql, sharedtestdata)
lazy val directmappingWebapp = project("directmapping-webapp", "directmapping-webapp", new DirectMappingWebapp(_), directmapping, jena)
- lazy val directmappingTest = project("directmapping-test", "directmapping-test", new DirectMappingTest(_), directmapping)
+ lazy val directmappingTest = project("directmapping-test", "directmapping-test", new DirectMappingTest(_), directmapping, jena)
lazy val sparql = project("sparql", "sparql", new SPARQL(_), rdf)
lazy val sparql2sql = project("sparql2sql", "sparql2sql", new SPARQL2SQL(_), sparql, sql)
lazy val sparql2sparql = project("sparql2sparql", "sparql2sparql", new SPARQL2SPARQL(_), sparql)
--- a/rdb/src/main/scala/RDB.scala Mon Feb 21 14:11:37 2011 -0500
+++ b/rdb/src/main/scala/RDB.scala Sun Mar 13 16:47:10 2011 -0400
@@ -5,59 +5,212 @@
*/
object RDB {
- case class Database (m:Map[RelName, Relation]) {
- def apply (rn:RelName) = m(rn)
- def keySet = m.keySet.toSet
+ /**
+ * a Database defines a set of relations
+ */
+ case class Database(private val m:Map[RelName, Relation]) {
+ // tricky and evil
+ // when the Database is created, it can go through all the foreign key and set the database field
+ // this way, a Target can know directly the Relation without using Database
+ for {
+ (_, r) <- m
+ fk <- r.fks
+ } {
+ fk.target.db = Some(this)
+ }
+
+ def apply(rn:RelName) = m(rn)
+ /** returns all the relations in this database */
+ def relations:Set[Relation] = m.values.toSet
}
+
object Database {
def apply (l:Relation*):Database =
Database(l map { r => (r.name -> r) } toMap)
}
- case class Relation (name:RelName, header:Header, body:List[Tuple], candidates:List[CandidateKey], pk:Option[CandidateKey], fks:ForeignKeys) {
- // TODO: should be + instead of ++
- def ++ (t:Tuple):Relation = this.copy(body = body :+ t)
+ /**
+ * definition of a Relation
+ * pk should be seen as the function extracting the unique Primary Key from candidates (if present)
+ * the current SQL parser ensures the pk is materialized twice
+ */
+ case class Relation(name:RelName,
+ header:Header,
+ body:List[Tuple],
+ candidates:List[CandidateKey],
+ pk:Option[CandidateKey],
+ fks:ForeignKeys) {
+ var rowCounter = 0
+ for {
+ t <- body
+ } {
+ t._relation = Some(this)
+ rowCounter = rowCounter + 1
+ t.rowId = rowCounter
+ }
+
+ /** adds a tuple in the body of the relation */
+ def +(t:Tuple):Relation = this.copy(body = body :+ t)
}
- case class Header (m:Map[AttrName, Datatype]) {
- def apply (a:AttrName) = m(a)
- def keySet = m.keySet.toSet
- def sqlDatatype (a:AttrName) : Datatype = m(a)
- def contains (a:AttrName) : Boolean = m contains a
+ case class RelName(n:String) {
+ override def toString = n
}
+
+ /**
+ * a Header maps an attribute name to a SQL datatype
+ * the set of attributes names can be extracted (it's really not ordered)
+ */
+ case class Header(private val m:Map[AttrName, Datatype]) extends PartialFunction[AttrName, Datatype] {
+ def apply(a:AttrName) = m(a)
+ def isDefinedAt(a:AttrName) = m isDefinedAt a
+ }
+
object Header {
def apply (s:(String, Datatype)*):Header =
Header(s map { case (name, datatype) => (AttrName(name), datatype) } toMap)
}
- type AttrList = List[AttrName]
- case class ForeignKeyKey (attrs:AttrList) {
- def toSet = attrs.toSet
+ /**
+ * a tuple maps an attribute name to a value in a cell
+ * the tuple does not carry the information from the header so when it's needed,
+ * this has to be retrieved from the relation
+ * especially, the order of the attribute names is not known within the tuple
+ */
+ case class Tuple(private val m:Map[AttrName, CellValue],
+ var rowId:Int = scala.util.Random.nextInt())
+ extends PartialFunction[AttrName, CellValue] {
+
+ override def toString = m.toString
+
+ // hack for the tuple to know the relation it belongs to
+ var _relation:Option[Relation] = None
+ lazy val relation:Relation = _relation.get
+
+ def apply (a:AttrName) = m(a)
+
+ def isDefinedAt(a:AttrName) = m isDefinedAt a
+
+ /**
+ * returns all the lexical values corresponding to the given as
+ * the order from the list is preserved
+ * we assume the values are restricted to non null cells
+ * so it's safe to call only with a primary key
+ */
+ def lexvalues (as:AttrList /* forall a in as, a is in this Tuple */):List[LexicalValue] =
+ as.attrs map {
+ m(_) match {
+ case lexicalValue @ LexicalValue(_) => lexicalValue
+ case ␀ => error("this value MUST not be null")
+ }
+ }
+
+ /**
+ * returns all the lexical values corresponding to the given as
+ * the order from the list is preserved
+ */
+ def cellvalues (as:AttrList /* forall a in as, a is in this Tuple */):List[CellValue] =
+ as.attrs map { m(_) }
+
+ /**
+ * returns all foreign keys that have only not-null values in it
+ */
+ def references:Set[ForeignKey] = {
+ val nullAttributes:Set[AttrName] =
+ m collect { case (attrName, cellValue) if cellValue == ␀ => attrName } toSet
+
+ relation.fks filter { case ForeignKey(as, _) => nullAttributes & as.toSet isEmpty }
+ }
+
+ /**
+ * returns all the not null attribute names
+ * such that they don't also define a unary foreign key
+ */
+ def scalars:Set[AttrName] = {
+ val notNullAttributes:Set[AttrName] =
+ m collect { case (attrName, cellValue) if cellValue != ␀ => attrName } toSet
+
+ notNullAttributes filterNot { attrName => relation.fks definesActuallyUnaryFK attrName }
+ }
+
+ def dereference(fk:ForeignKey):Tuple = {
+ val values = this.cellvalues(fk)
+ val targetRelation:Relation = fk.target.relation
+ targetRelation.body find { t => values == t.cellvalues(fk.target.key) } get
+ }
+
}
- case class CandidateKey (attrs:AttrList)
+ object Tuple {
+ def apply (s:(String, CellValue)*):Tuple =
+ Tuple(s map { case (name, cellValue) => (AttrName(name), cellValue) } toMap)
+ }
+
+ /**
+ * a cell value has either a lexical value or the NULL value
+ */
+ abstract class CellValue
+ case class LexicalValue(s:String) extends CellValue
+ case object ␀ extends CellValue
+
+ /**
+ * ForeignKeys abstracts a set of foreign keys
+ * the expected operation are available
+ */
+ case class ForeignKeys (private val fks:Set[ForeignKey]) {
+ def definesActuallyUnaryFK(a:AttrName):Boolean = fks filter { _.isUnary } exists { _.attrs contains a }
+ def filter(p: ForeignKey => Boolean):Set[ForeignKey] = fks filter p
+ def foreach(f:ForeignKey => Unit) = fks foreach f
+ // sparql2sql
+ def contains(a:AttrList):Boolean = fks exists { fk => fk.attrs == a.attrs }
+ def targetOf(as:AttrList):Target = fks collect { case ForeignKey(attrs, target) if as.attrs == attrs => target } head
+ }
+
+ object ForeignKeys {
+ def apply (fks:(List[String], Target)*):ForeignKeys =
+ ForeignKeys(fks map { case (keys, target) => ForeignKey(keys map { AttrName(_) }, target)} toSet)
+ }
+
+ trait AttrList {
+ val attrs:List[AttrName]
+ /**
+ * by definition, a key is unary if it has only one attribute
+ */
+ def isUnary:Boolean = attrs.length == 1
+ // for sparql2sql
+ def apply(index:Int):AttrName = attrs(index)
+ }
+
+ object AttrList {
+ def apply(a:AttrName):AttrList = new AttrList { val attrs = List(a) }
+ def apply(as:List[AttrName]):AttrList = new AttrList { val attrs = as }
+ }
+
+ case class ForeignKey(attrs:List[AttrName], target:Target) extends AttrList
+
+ case class Target(rn:RelName, key:CandidateKey) {
+ // hack: this field will be updated by the Database when it will be created
+ // as the target is fully know at that moment
+ var db:Option[Database] = None
+ lazy val relation:Relation = db.get(rn)
+ }
+
+ case class CandidateKey (attrs:List[AttrName]) extends AttrList
+
object CandidateKey {
def apply (l:String*):CandidateKey =
- CandidateKey(l.toList map { AttrName(_) })
- }
- implicit def cc2list (cc:CandidateKey) = cc.attrs
-
- case class ForeignKeys (m:Map[ForeignKeyKey, Target]) {
- def apply (l:ForeignKeyKey) = m(l)
- def keySet = m.keySet.toSet
- def contains (l:AttrList) = m contains ForeignKeyKey(l) // self-promoting cheat
- def refdAttrs (kk:ForeignKeyKey) = m(kk).key.attrs
- }
- object ForeignKeys {
- def apply (s:(List[String], Target)*):ForeignKeys =
- ForeignKeys(s map { case (keys, target) => (ForeignKeyKey(keys map { AttrName(_) }), target)} toMap)
+ CandidateKey(l map { AttrName(_) } toList)
}
- case class Target (rel:RelName, key:CandidateKey)
+ case class AttrName(n:String) {
+ override def toString = n
+ }
case class Datatype(name:String) {
override def toString = "/* " + name + " */"
}
+
+ // TODO: use Enumeration so we can go through the datatypes?
object Datatype {
val CHAR = Datatype("Char")
val VARCHAR = Datatype("Varchar")
@@ -71,26 +224,4 @@
val DATETIME = Datatype("Datetime")
}
- case class Tuple (m:Map[AttrName, CellValue]) {
- def apply (a:AttrName) = m(a)
- def lexvalue (a:AttrName) : CellValue = m(a)
- def lexvaluesNoNulls (as:List[AttrName]) = as map { m(_).asInstanceOf[LexicalValue] }
- def nullAttributes (h:Header) : Set[AttrName] = h.keySet filter { m(_) == ␀ }
- }
- object Tuple {
- def apply (s:(String, CellValue)*):Tuple =
- Tuple(s map { case (name, cellValue) => (AttrName(name), cellValue) } toMap)
- }
-
- abstract class CellValue
- case class LexicalValue (s:String) extends CellValue
- case object ␀ extends CellValue
-
- case class RelName(n:String) {
- override def toString = n
- }
- case class AttrName(n:String) {
- override def toString = n
- }
-
}
--- a/rdf/src/main/scala/RDF.scala Mon Feb 21 14:11:37 2011 -0500
+++ b/rdf/src/main/scala/RDF.scala Sun Mar 13 16:47:10 2011 -0400
@@ -5,7 +5,10 @@
trait Model {
type IRI
- type Graph <: Iterable[Triple]
+ trait GraphLike extends Iterable[Triple] { self =>
+ def ++(other:Graph):Graph
+ }
+ type Graph <: GraphLike
type Triple
type BNode
type Node
@@ -80,11 +83,14 @@
case class IRI(iri:String) { override def toString = '"' + iri + '"' }
object IRI extends Isomorphic1[String, IRI]
- type Graph = Set[Triple]
+ case class Graph(triples:Set[Triple]) extends GraphLike {
+ def iterator = triples.iterator
+ def ++(other:Graph):Graph = Graph(triples ++ other.triples)
+ }
object Graph extends GraphObject {
- def empty:Graph = Set[Triple]()
- def apply(elems:Triple*):Graph = Set[Triple](elems:_*)
- def apply(it:Iterable[Triple]):Graph = it.toSet
+ def empty:Graph = Graph(Set[Triple]())
+ def apply(elems:Triple*):Graph = Graph(Set[Triple](elems:_*))
+ def apply(it:Iterable[Triple]):Graph = Graph(it.toSet)
}
case class Triple (s:Subject, p:Predicate, o:Object)
--- a/sharedtestdata/directmappingspec/hier_tabl.ttl Mon Feb 21 14:11:37 2011 -0500
+++ b/sharedtestdata/directmappingspec/hier_tabl.ttl Sun Mar 13 16:47:10 2011 -0400
@@ -1,12 +1,12 @@
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
-<Addresses/ID.18#_> a <ExecutiveOffices> .
-<Addresses/ID.18#_> <ExecutiveOffices#ID> 18 .
-<Addresses/ID.18#_> <ExecutiveOffices#desk> "oak" .
+<ExecutiveOffices/ID.18#_> a <ExecutiveOffices> .
+<ExecutiveOffices/ID.18#_> <ExecutiveOffices#ID> <Offices/ID.18#_> .
+<ExecutiveOffices/ID.18#_> <ExecutiveOffices#desk> "oak" .
-<Addresses/ID.18#_> a <Offices> .
-<Addresses/ID.18#_> <Offices#ID> 18 .
-<Addresses/ID.18#_> <Offices#building> 32 .
-<Addresses/ID.18#_> <Offices#ofcNumber> "G528" .
+<Offices/ID.18#_> a <Offices> .
+<Offices/ID.18#_> <Offices#ID> <Addresses/ID.18#_> .
+<Offices/ID.18#_> <Offices#building> 32 .
+<Offices/ID.18#_> <Offices#ofcNumber> "G528" .
<People/ID.7#_> a <People> .
<People/ID.7#_> <People#ID> 7 .
--- a/sharedtestdata/directmappingspec/hier_tabl_proto.ttl Mon Feb 21 14:11:37 2011 -0500
+++ b/sharedtestdata/directmappingspec/hier_tabl_proto.ttl Sun Mar 13 16:47:10 2011 -0400
@@ -1,8 +1,8 @@
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
-<Addresses/ID.18#_> a <Offices> .
-<Addresses/ID.18#_> <Offices#ID> 18 .
-<Addresses/ID.18#_> <Offices#building> 32 .
-<Addresses/ID.18#_> <Offices#ofcNumber> "G528" .
+<Offices/ID.18#_> a <Offices> .
+<Offices/ID.18#_> <Offices#ID> <Addresses/ID.18#_> .
+<Offices/ID.18#_> <Offices#building> 32 .
+<Offices/ID.18#_> <Offices#ofcNumber> "G528" .
<People/ID.7#_> a <People> .
<People/ID.7#_> <People#ID> 7 .
--- a/sparql2sql/src/main/scala/SparqlToSql.scala Mon Feb 21 14:11:37 2011 -0500
+++ b/sparql2sql/src/main/scala/SparqlToSql.scala Sun Mar 13 16:47:10 2011 -0400
@@ -293,11 +293,12 @@
case Some(rdb.RDB.CandidateKey(List(rdb.RDB.AttrName(constrainMe.attribute.n)))) => RDFNoder(rel, boundTo)
case _ => {
val asKey = List(constrainMe.attribute)
- if (reldesc.fks.contains(rdb.RDB.CandidateKey(asKey))) { // !! (0)
+ if (reldesc.fks contains rdb.RDB.CandidateKey(asKey)) { // !! (0)
/** varConstraint(R_patient, Some(SexDE), VarAssignable(?_0_sexEntry), Person) -> RDFNoder(Person,FullBinding(R_patient.SexDE)) */
- val rdb.RDB.Target(fkrel, fkattr) = reldesc.fks(rdb.RDB.ForeignKeyKey(asKey))
+ // WTF? I don't see where the following is used
+ // val rdb.RDB.Target(fkrel, fkattr) = reldesc.fks(rdb.RDB.ForeignKeyKey(asKey))
RDFNoder(rel, boundTo)
- } else if (reldesc.header.contains(constrainMe.attribute)) {
+ } else if (reldesc.header isDefinedAt constrainMe.attribute) {
reldesc.header(constrainMe.attribute) match {
/** varConstraint(R__0_indicDE, Some(NDC), VarAssignable(?_0_indicNDC), Medication_DE) -> IntMapper(FullBinding(R__0_indicDE.NDC)) */
case rdb.RDB.Datatype("Int") => IntMapper(boundTo)
@@ -434,25 +435,25 @@
* fkrel.fkattr (e.g. Employee.manager) may be a foreign key.
* Calculate final relvarattr and relation.
*/
- val asKey = List(attr)
+ val asKey = rdb.RDB.AttrList(attr)
val (targetattr:sql.RelVarAttr, targetrel, dt, state_fkeys:R2RState) =
- if (db(rel).fks.contains(rdb.RDB.CandidateKey(asKey))) { // !! (0)
- val rdb.RDB.Target(fkrel, fkattr) = db(rel).fks(rdb.RDB.ForeignKeyKey(asKey))
- try { db(fkrel).header(fkattr.attrs(0)) } catch { // !! (0)
+ if (db(rel).fks contains asKey) { // !! (0)
+ val rdb.RDB.Target(targetRel, targetCK) = db(rel).fks targetOf asKey
+ try { db(targetRel).header isDefinedAt targetCK(0) } catch { // !! (0)
/** Foreign key relation.attribute was not found in the database description. */
case e:java.util.NoSuchElementException =>
- throw new Exception("db(" + fkrel + ").header(" + fkattr + ") not found in " + db)
+ throw new Exception("db(" + targetRel + ").header(" + targetCK + ") not found in " + db)
}
val fkdt =
- if (db(fkrel).fks.contains(fkattr)) {
+ if (db(targetRel).fks contains targetCK) {
/** Foreign key to something which is a foreign key. May have use
* cases, but signal error until we figure out that they are. */
- val rdb.RDB.Target(dfkrel, dfkattr) = db(fkrel).fks(rdb.RDB.ForeignKeyKey(fkattr.attrs))
+ val rdb.RDB.Target(dfkrel, dfkattr) = db(targetRel).fks targetOf targetCK
error("foreign key " + rel.n + "." + attr.n +
- "->" + fkrel.n + "." + fkattr.attrs(0).n + // !! (0)
- "->" + dfkrel.n + "." + dfkattr.attrs(0).n) // !! (0)
+ "->" + targetRel.n + "." + targetCK(0).n + // !! (0)
+ "->" + dfkrel.n + "." + dfkattr(0).n) // !! (0)
} else
- db(fkrel).header(fkattr(0)) // !! (0)
+ db(targetRel).header(targetCK(0)) // !! (0)
if (enforceForeignKeys) {
/**
* Create an extra join on the foreign key relvar. For instance,
@@ -462,13 +463,13 @@
* and bind targetattr:R_who.empid. targetrel:Employee .
*/
val oRelVar = relVarFromTerm(o)
- val fkaliasattr = sql.RelVarAttr(oRelVar, fkattr.attrs(0)) // !! (0)
- val state_t = R2RState(state_subjJoin.joins + sql.InnerJoin(sql.AliasedResource(fkrel,oRelVar), None),
+ val fkaliasattr = sql.RelVarAttr(oRelVar, targetCK(0)) // !! (0)
+ val state_t = R2RState(state_subjJoin.joins + sql.InnerJoin(sql.AliasedResource(targetRel,oRelVar), None),
state_subjJoin.varmap,
state_subjJoin.exprs + sql.RelationalExpressionEq(sql.PrimaryExpressionAttr(fkaliasattr),
sql.PrimaryExpressionAttr(objattr)))
//println("enforceFKs: <code>"+s+" "+p+" "+o+"</code> where "+rel+"."+attr+" is a foreign key to "+fkrel+"."+fkattr+" will join "+fkrel+" AS "+oRelVar+", constrain "+fkaliasattr+"="+objattr+" and bind targetattr:=" + fkaliasattr + ". targetrel:=" + fkrel + " (instead of " + objattr + ", " + rel + ").")
- (fkaliasattr, fkrel, fkdt, state_t)
+ (fkaliasattr, targetRel, fkdt, state_t)
} else {
/**
* We're not enforcing foreign keys, so just bind
--- a/sql/src/main/scala/SQL.scala Mon Feb 21 14:11:37 2011 -0500
+++ b/sql/src/main/scala/SQL.scala Sun Mar 13 16:47:10 2011 -0400
@@ -168,7 +168,7 @@
sealed abstract class KeyDeclaration extends FieldDescOrKeyDeclaration
case class PrimaryKeyDeclaration(key:RDB.CandidateKey) extends KeyDeclaration
case class CandidateKeyDeclaration(key:RDB.CandidateKey) extends KeyDeclaration
-case class ForeignKeyDeclaration(fk:RDB.ForeignKeyKey, rel:RDB.RelName, pk:RDB.CandidateKey) extends KeyDeclaration
+case class ForeignKeyDeclaration(fk:List[RDB.AttrName], rel:RDB.RelName, pk:RDB.CandidateKey) extends KeyDeclaration
case class View(rel:RDB.RelName, defn:SelectORUnion) { // sibling of RDB.Relation
override def toString = "CREATE VIEW " + rel + " AS\n" + defn
}
@@ -196,7 +196,7 @@
case l~x => RDB.Database(l.foldLeft(Map[RDB.RelName, RDB.Relation]())((m, p) => {
p match {
case Create(rn:RDB.RelName, relation:RDB.Relation) => m + (rn -> relation)
- case Insert(rn:RDB.RelName, tuple:RDB.Tuple) => m + (rn -> (m(rn) ++ tuple)) // add the tuple
+ case Insert(rn:RDB.RelName, tuple:RDB.Tuple) => m + (rn -> (m(rn) + tuple)) // add the tuple
}
}))
}
@@ -213,14 +213,13 @@
val pk0:Option[RDB.CandidateKey] = None
val attrs0 = Map[RDB.AttrName, RDB.Datatype]()
val candidates0 = List[RDB.CandidateKey]()
- val fks0 = Map[RDB.ForeignKeyKey, RDB.Target]()
+ val fks0 = Set[RDB.ForeignKey]()
/* <pk>: (most recently parsed) PRIMARY KEY
* <attrs>: map of attribute to type (e.g. INTEGER)
* <fks>: map holding FOREIGN KEY relation REFERENCES attr
*/
val (pk, attrs, candidates, fks) =
- reldesc.foldLeft((pk0, attrs0, candidates0, fks0))((p, rd) => {
- val (pkopt, attrs, candidates, fks) = p
+ reldesc.foldLeft((pk0, attrs0, candidates0, fks0)) { case ((pkopt, attrs, candidates, fks), rd) => {
rd match {
case FieldDesc(attr, value, pkness) => {
val (pkNew, candNew) =
@@ -230,14 +229,14 @@
}
case PrimaryKeyDeclaration(key) =>
// @@ why doesn't [[ candidates + RDB.CandidateKey(attr.n) ]] work?
- (Some(key), attrs, candidates ++ List(RDB.CandidateKey(key map {attr => RDB.AttrName(attr.n)})), fks)
+ (Some(key), attrs, candidates :+ key, fks)
case CandidateKeyDeclaration(key) =>
// @@ why doesn't [[ candidates + RDB.CandidateKey(attr.n) ]] work?
- (pkopt, attrs, candidates ++ List(RDB.CandidateKey(key map {attr => RDB.AttrName(attr.n)})), fks)
+ (pkopt, attrs, candidates :+ key, fks)
case ForeignKeyDeclaration(fk, rel, pk) =>
- (pkopt, attrs, candidates, fks + (fk -> RDB.Target(rel, pk)))
+ (pkopt, attrs, candidates, fks + RDB.ForeignKey(fk, RDB.Target(rel, pk)))
}
- })
+ } }
val rd = RDB.Relation(relation, RDB.Header(attrs), List(), candidates, pk, RDB.ForeignKeys(fks))
Create(relation, rd)
}
@@ -262,8 +261,7 @@
| "(?i)UNIQUE".r ~ "(" ~ rep1sep(attribute, ",") ~ ")" ^^
{ case _~"("~attributes~")" => CandidateKeyDeclaration(RDB.CandidateKey(attributes)) }
| "(?i)FOREIGN".r ~ "(?i)KEY".r ~ "(" ~ rep1sep(attribute, ",") ~ ")" ~ "(?i)REFERENCES".r ~ relation ~ "(" ~ rep1sep(attribute, ",") ~ ")" ^^
- { case _~_~"("~fk~")"~_~relation~"("~pk~")" => ForeignKeyDeclaration(RDB.ForeignKeyKey(fk), relation, RDB.CandidateKey(pk)) }
- )
+ { case _~_~"("~fk~")"~_~relation~"("~pk~")" => ForeignKeyDeclaration(fk, relation, RDB.CandidateKey(pk)) } )
def typpe:Parser[RDB.Datatype] = (
"(?i)INTEGER".r ~ opt(size)^^ { case _ => RDB.Datatype.INTEGER }