~ progress on UNION
authorEric Prud'hommeaux <bertails@w3.org>
Wed, 30 Dec 2009 09:42:53 -0500
changeset 86 ee54401b7e2d
parent 85 768db607cf26
child 87 98edfae72eae
~ progress on UNION
src/main/scala/RDB2RDFMain.scala
src/test/scala/RDB2RDFTest.scala
--- a/src/main/scala/RDB2RDFMain.scala	Tue Dec 29 17:35:25 2009 -0500
+++ b/src/main/scala/RDB2RDFMain.scala	Wed Dec 30 09:42:53 2009 -0500
@@ -50,11 +50,11 @@
     RelAlias(Name("R_" + v))
   }
 
-  def uriConstraint(state:R2RState, constrainMe:RelAliasAttribute, u:ObjUri):R2RState = {
+  def uriConstraint(state:R2RState, constrainMe:RelAliasAttribute, u:ObjUri, enforeForeignKeys:Boolean):R2RState = {
     // println("equiv+= " + toString(constrainMe) + "=" + value)
     //R2RState(state.joins, state.varmap, state.exprs + PrimaryExpressionEq(constrainMe,RValueTyped(SQLDatatype.INTEGER,Name(u.v.s))))
-    val attr = Attribute(Name(u.attr.s))
-    val relvar = RelAliasAttribute(constrainMe.relalias, attr)
+    val relvar = if (enforeForeignKeys) RelAliasAttribute(constrainMe.relalias, Attribute(Name(u.attr.s))) else { println("constraining to " + constrainMe)
+constrainMe }
     R2RState(state.joins, state.varmap, state.exprs + PrimaryExpressionEq(relvar,RValueTyped(SQLDatatype.INTEGER,Name(u.v.s))))
   }
 
@@ -149,32 +149,34 @@
 	val subjattr = RelAliasAttribute(relalias, pk.attr)
 	val objattr = RelAliasAttribute(relalias, attr)
 	state = s match {
-	  case SUri(u) => uriConstraint(state, subjattr, u)
+	  case SUri(u) => uriConstraint(state, subjattr, u, true)
 	  case SVar(v) => varConstraint(state, subjattr, v, db, rel)
 	}
 	state = R2RState(state.joins + AliasedResource(rel,relalias), state.varmap, state.exprs)
 
 	val (targetattr, targetrel, dt) = db.relationdescs(rel).attributes(attr) match {
 	  case ForeignKey(fkrel, fkattr) => {
-	    val oRelAlias = relAliasFromO(o)
-	    val fkaliasattr = RelAliasAttribute(oRelAlias, fkattr)
-	    state = R2RState(state.joins, state.varmap, state.exprs + PrimaryExpressionEq(fkaliasattr,RValueAttr(objattr)))
-	    if (enforeForeignKeys)
-	      state = R2RState(state.joins + AliasedResource(fkrel,oRelAlias), state.varmap, state.exprs)
-
 	    var fkdt = db.relationdescs(fkrel).attributes(fkattr) match {
 	      case ForeignKey(dfkrel, dfkattr) => error("foreign key " + rel.n + "." + attr.n + 
 							"->" + fkrel.n + "." + fkattr.n + 
 							"->" + dfkrel.n + "." + dfkattr.n)
 	      case Value(x) => x
 	    }
-	    (fkaliasattr, fkrel, fkdt)
+	    if (enforeForeignKeys) {
+	      val oRelAlias = relAliasFromO(o)
+	      val fkaliasattr = RelAliasAttribute(oRelAlias, fkattr)
+	      state = R2RState(state.joins + AliasedResource(fkrel,oRelAlias), state.varmap, state.exprs + PrimaryExpressionEq(fkaliasattr,RValueAttr(objattr)))
+
+	      (fkaliasattr, fkrel, fkdt)
+	    } else {
+	      (objattr, rel, fkdt)
+	    }
 	  }
 	  case Value(dt) => (objattr, rel, dt)
 	}
 	state = o match {
 	  case OLit(l) => literalConstraint(state, targetattr, l, dt)
-	  case OUri(u) => uriConstraint    (state, targetattr, u)
+	  case OUri(u) => uriConstraint    (state, targetattr, u, enforeForeignKeys)
 	  case OVar(v) => varConstraint    (state, targetattr, v, db, targetrel)
 	}
       }
@@ -270,7 +272,7 @@
 	var state2 = state
 
 	/* Examine each triple, updating the compilation state. */
-	triplepatterns.foreach(s => state2 = bindOnPredicate(db, state2, s, pk, true))
+	triplepatterns.foreach(s => state2 = bindOnPredicate(db, state2, s, pk, enforeForeignKeys))
 
 	// val allVars:Set[Var] = triples.triplepatterns.foldLeft(Set[Var]())((x, y) => x ++ findVars(y))
 	val allVars:Set[Var] = findVars(gp)
@@ -279,11 +281,64 @@
 
 	R2RState(state2.joins, state2.varmap, state2.exprs ++ nullExprs)
       }
+      case TableConjunction(list) => {
+	list.foldLeft(state)((incState,s) => mapGraphPattern(db, incState, s, pk, enforeForeignKeys))
+      }
+      case TableDisjunction(list) => {
+	val unionAlias = RelAlias(Name("R_union1"))
+	var initDisjoints:Set[Select] = Set()
+	val emptyState = R2RState(
+	  Set[AliasedResource](), 
+	  Map[Var, SQL2RDFValueMapper](), 
+	  Set[PrimaryExpression]()
+	)
+	val (state2, disjoints) = list.foldLeft((state, initDisjoints))((incPair,disjoint) => {
+	  val (outerState, outerDisjoints) = incPair
+	  val disjointState = mapGraphPattern(db, emptyState, disjoint, pk, enforeForeignKeys)
+	  val disjointVars = findVars(disjoint)
+
+	  val attrlist:Set[NamedAttribute] = disjointVars.foldLeft(Set[NamedAttribute]())((attrs, v) => 
+	    attrs ++ Set(NamedAttribute(varToAttribute(disjointState.varmap, v), AttrAlias(Name("A_" + v.s)))))
+
+	  val sel = Select(
+	    AttributeList(attrlist),
+	    TableList(disjointState.joins),
+	    Expression(disjointState.exprs)
+	  )
+	  println("sel: " + sel)
+	  println("outerState: " + outerState)
+	  val outerState2 = disjointVars.foldLeft(outerState)((myState, v) => {
+	    val unionAliasAttr = RelAliasAttribute(unionAlias, varToAttribute(disjointState.varmap, v).attribute)
+	    println("examining " + v)
+	    if (myState.varmap.contains(v)) {
+	      /* The variable has already been bound. */
+	      if (varToAttribute(myState.varmap, v) == unionAliasAttr)
+		/* Same var was bound in an earlier disjoint. */
+		myState
+	      else
+		/* Constraint against the initial binding for this variable. */
+		R2RState(myState.joins, myState.varmap, myState.exprs + PrimaryExpressionEq(varToAttribute(myState.varmap, v), RValueAttr(unionAliasAttr)))
+	    } else {
+	      /* This variable is new to the outer context. */
+	      val mapper:SQL2RDFValueMapper = disjointState.varmap(v) match {
+		case RDFNoder(rel, constrainMe)  => RDFNoder(rel, RelAliasAttribute(unionAlias, constrainMe.attribute))
+		case StringMapper(constrainMe)   => StringMapper(RelAliasAttribute(unionAlias, Attribute(Name("A_" + v.s))))
+		case IntMapper(constrainMe)      => IntMapper(RelAliasAttribute(unionAlias, constrainMe.attribute))
+		case RDFBNoder(rel, constrainMe) => RDFBNoder(rel, RelAliasAttribute(unionAlias, constrainMe.attribute))
+	      }
+	      R2RState(myState.joins, myState.varmap + (v -> mapper), myState.exprs)
+	    }
+	  })
+	  (outerState2, outerDisjoints ++ Set(sel))
+	})
+	val union = Subselect(Union(disjoints))
+	R2RState(state.joins + AliasedResource(union,unionAlias), state2.varmap, state2.exprs)
+      }
       case x => error("no code to handle " + x)
     }
   }
 
-  def apply (db:DatabaseDesc, sparql:SparqlSelect, stem:StemURI, pk:PrimaryKey) : Select = {
+  def apply (db:DatabaseDesc, sparql:SparqlSelect, stem:StemURI, pk:PrimaryKey, enforeForeignKeys:Boolean) : Select = {
     val SparqlSelect(attrs, triples) = sparql
 
     /* Create an object to hold our compilation state. */
@@ -293,21 +348,18 @@
       Set[PrimaryExpression]()
     )
 
-    r2rState = mapGraphPattern(db, r2rState, sparql.gp, pk, true)
+    r2rState = mapGraphPattern(db, r2rState, sparql.gp, pk, enforeForeignKeys)
 
     /* Select the attributes corresponding to the variables
      * in the SPARQL SELECT.  */
-    var attrlist:Set[NamedAttribute] = Set()
-    attrs.attributelist.foreach(vvar => attrlist += 
-      NamedAttribute(varToAttribute(r2rState.varmap, vvar), AttrAlias(Name("A_" + vvar.s)))
-    )
+    val attrlist:Set[NamedAttribute] = attrs.attributelist.foldLeft(Set[NamedAttribute]())((attrs, vvar) => 
+      attrs ++ Set(NamedAttribute(varToAttribute(r2rState.varmap, vvar), AttrAlias(Name("A_" + vvar.s)))))
 
     /* Construct the generated query as an abstract syntax. */
     Select(
       AttributeList(attrlist),
       TableList(r2rState.joins),
       Expression(r2rState.exprs)
-//      Expression(exprWithNull)
     )
   }
 }
--- a/src/test/scala/RDB2RDFTest.scala	Tue Dec 29 17:35:25 2009 -0500
+++ b/src/test/scala/RDB2RDFTest.scala	Wed Dec 30 09:42:53 2009 -0500
@@ -44,7 +44,7 @@
             INNER JOIN Employee AS R_id18
  WHERE R_id18.id=R_emp.manager AND R_id18.id=18 AND R_emp.id IS NOT NULL
 """).get
-    assert(RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), PrimaryKey(Attribute(Name("id")))) === sqlSelect)
+    assert(RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), PrimaryKey(Attribute(Name("id"))), true) === sqlSelect)
     true
   }
 
@@ -62,7 +62,7 @@
             INNER JOIN Employee AS R_manager
  WHERE R_manager.id=R_id18.manager AND R_id18.id=18 AND R_manager.id IS NOT NULL
 """).get
-    assert(RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), PrimaryKey(Attribute(Name("id")))) === sqlSelect)
+    assert(RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), PrimaryKey(Attribute(Name("id"))), true) === sqlSelect)
     true
   }
 
@@ -82,7 +82,7 @@
             INNER JOIN Employee AS R_18
  WHERE R_18.id=R_emp.manager AND R_18.id=18 AND R_emp.id IS NOT NULL
 """).get
-    assert(RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), PrimaryKey(Attribute(Name("id")))) === sqlSelect)
+    assert(RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), PrimaryKey(Attribute(Name("id"))), true) === sqlSelect)
     true
   }
 
@@ -101,7 +101,7 @@
             INNER JOIN Employee AS R_emp2
  WHERE R_emp1.lastName=R_emp2.lastName AND R_emp1.id IS NOT NULL AND R_emp1.lastName IS NOT NULL AND R_emp2.id IS NOT NULL
 """).get
-    assert(RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), PrimaryKey(Attribute(Name("id")))) === sqlSelect)
+    assert(RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), PrimaryKey(Attribute(Name("id"))), true) === sqlSelect)
     true
   }
 
@@ -123,10 +123,28 @@
  AND R_emp.id IS NOT NULL
  AND R_manager.id IS NOT NULL
 """).get
-    assert(RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), PrimaryKey(Attribute(Name("id")))) === sqlSelect)
+    assert(RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), PrimaryKey(Attribute(Name("id"))), true) === sqlSelect)
   }
 
-  test("transform tup1") {
+  test("transform tup1 no-enforce") {
+    val sparqlParser = Sparql()
+    val sparqlSelect = sparqlParser.parseAll(sparqlParser.select, """
+SELECT ?empName {
+ ?emp      <http://hr.example/DB/Employee#lastName>   ?empName .
+ ?emp      <http://hr.example/DB/Employee#manager>    <http://hr.example/DB/Employee/id.18#record>
+ }
+""").get
+    val sqlParser = Sql()
+    val sqlSelect = sqlParser.parseAll(sqlParser.select, """
+SELECT R_emp.lastName AS A_empName
+       FROM Employee AS R_emp
+ WHERE R_emp.manager=18 AND R_emp.lastName IS NOT NULL
+ AND R_emp.id IS NOT NULL
+""").get
+    assert(RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), PrimaryKey(Attribute(Name("id"))), false) === sqlSelect)
+  }
+
+  test("transform tup1 enforce") {
     val sparqlParser = Sparql()
     val sparqlSelect = sparqlParser.parseAll(sparqlParser.select, """
 SELECT ?empName {
@@ -142,7 +160,7 @@
  WHERE R_id18.id=R_emp.manager AND R_id18.id=18 AND R_emp.lastName IS NOT NULL
  AND R_emp.id IS NOT NULL
 """).get
-    assert(RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), PrimaryKey(Attribute(Name("id")))) === sqlSelect)
+    assert(RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), PrimaryKey(Attribute(Name("id"))), true) === sqlSelect)
   }
 
 
@@ -164,7 +182,7 @@
  AND R_emp.id IS NOT NULL
  AND R_manager.id IS NOT NULL
 """).get
-    assert(RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), PrimaryKey(Attribute(Name("id")))) === sqlSelect)
+    assert(RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), PrimaryKey(Attribute(Name("id"))), true) === sqlSelect)
   }
 
   test("transform filter1") {
@@ -201,39 +219,39 @@
  AND R_manager.birthday IS NOT NULL
  AND R_grandManager.birthday IS NOT NULL
 """).get
-    assert(RDB2RDF(db2, sparqlSelect, StemURI("http://hr.example/DB/"), PrimaryKey(Attribute(Name("id")))) === sqlSelect)
+    assert(RDB2RDF(db2, sparqlSelect, StemURI("http://hr.example/DB/"), PrimaryKey(Attribute(Name("id"))), true) === sqlSelect)
   }
 
-//   test("transform disj1") {
-//     val sparqlParser = Sparql()
-//     val sparqlSelect = sparqlParser.parseAll(sparqlParser.select, """
-// SELECT ?name
-//  WHERE { ?who emplP:lastName "Smith"
-//          { ?above   manageP:manages ?who .
-//            ?above   manageP:manager ?manager .
-//            ?manager emplP:lastName  ?name }
-//          UNION
-//          { ?below   manageP:manager ?who .
-//            ?below   manageP:manages ?managed .
-//            ?managed emplP:lastName  ?name } }
-// """).get
-//     val sqlParser = Sql()
-//     val sqlSelect = sqlParser.parseAll(sqlParser.select, """
-// SELECT union1.name
-//   FROM Employee AS who
-//        INNER JOIN (
-//          SELECT manager.lastName AS name, above.manages AS who
-//                 FROM Manage AS above
-//                 INNER JOIN Employee as manager ON above.manager=manager.id
-//           WHERE manager.lastName IS NOT NULL
-//        UNION
-//          SELECT managed.lastName AS name, below.manager AS who
-//                 FROM Manage AS below
-//                 INNER JOIN Employee as managed ON below.manages=managed.id
-//           WHERE managed.lastName IS NOT NULL
-//        ) AS union1 ON union1.who=who.id
-//  WHERE who.lastName="Smith"
-// """).get
-//     assert(RDB2RDF(db2, sparqlSelect, StemURI("http://hr.example/DB/"), PrimaryKey(Attribute(Name("id")))) === sqlSelect)
-//   }
+  test("transform disj1") {
+    val sparqlParser = Sparql()
+    val sparqlSelect = sparqlParser.parseAll(sparqlParser.select, """
+SELECT ?name
+       { ?who <http://hr.example/DB/Employee#lastName> "Smith"^^<http://www.w3.org/2001/XMLSchema#string>
+         { ?above   <http://hr.example/DB/Manage#manages> ?who .
+           ?above   <http://hr.example/DB/Manage#manager> ?manager .
+           ?manager <http://hr.example/DB/Employee#lastName>  ?name }
+         UNION
+         { ?below   <http://hr.example/DB/Manage#manager> ?who .
+           ?below   <http://hr.example/DB/Manage#manages> ?managed .
+           ?managed <http://hr.example/DB/Employee#lastName>  ?name } }
+""").get
+    val sqlParser = Sql()
+    val sqlSelect = sqlParser.parseAll(sqlParser.select, """
+SELECT R_union1.A_name AS A_name
+  FROM Employee AS R_who
+       INNER JOIN (
+         SELECT R_manager.lastName AS A_name, R_above.manages AS A_who
+                FROM Manage AS R_above
+                INNER JOIN Employee AS R_manager
+          WHERE R_above.manager=R_manager.id AND R_manager.lastName IS NOT NULL
+       UNION
+         SELECT R_managed.lastName AS A_name, R_below.manager AS A_who
+                FROM Manage AS R_below
+                INNER JOIN Employee AS R_managed
+          WHERE R_below.manages=R_managed.id AND R_managed.lastName IS NOT NULL
+       ) AS R_union1
+ WHERE R_union1.A_who=R_who.id AND R_who.lastName="Smith"
+""").get
+    assert(RDB2RDF(db2, sparqlSelect, StemURI("http://hr.example/DB/"), PrimaryKey(Attribute(Name("id"))), false) === sqlSelect)
+  }
 }