- *some* spurious NOT NULLs
authorEric Prud'hommeaux <eric@w3.org>
Fri, 08 Jan 2010 19:31:52 -0500
changeset 134 f661bcdbb2e1
parent 133 e3d5b1289b8c
child 135 434a54e4588c
- *some* spurious NOT NULLs
src/main/scala/RDB2RDFMain.scala
src/test/scala/RDB2RDFTest.scala
--- a/src/main/scala/RDB2RDFMain.scala	Fri Jan 08 08:27:15 2010 -0500
+++ b/src/main/scala/RDB2RDFMain.scala	Fri Jan 08 19:31:52 2010 -0500
@@ -248,6 +248,22 @@
     }
   }
 
+  def findNonNullRelAliasAttrs(expr:sql.Expression):Set[sql.RelAliasAttribute] = {
+    expr match {
+      case sql.ExprConjunction(l) => l.foldLeft(Set[sql.RelAliasAttribute]())((s, e) => s ++ findNonNullRelAliasAttrs(e))
+      case sql.ExprDisjunction(l) => l.foldLeft(Set[sql.RelAliasAttribute]())((s, e) => s ++ findNonNullRelAliasAttrs(e))
+      case sql.RelationalExpressionEq(l, r) => findNonNullRelAliasAttrs(l) ++ findNonNullRelAliasAttrs(r)
+      case sql.RelationalExpressionNe(l, r) => findNonNullRelAliasAttrs(l) ++ findNonNullRelAliasAttrs(r)
+      case sql.RelationalExpressionLt(l, r) => findNonNullRelAliasAttrs(l) ++ findNonNullRelAliasAttrs(r)
+      case e:sql.PrimaryExpressionTyped => Set()
+      case sql.PrimaryExpressionAttr(a) => Set(a)
+      case e:sql.ConstNULL => Set()
+      case e:sql.Concat => Set()
+      case sql.RelationalExpressionNull(a) => findNonNullRelAliasAttrs(a)
+      case sql.RelationalExpressionNotNull(a) => findNonNullRelAliasAttrs(a)
+    }
+  }
+
   def varToAttribute(varmap:Map[sparql.Var, SQL2RDFValueMapper], vvar:sparql.Var):sql.RelAliasAttribute = {
     varmap(vvar) match {
       case IntMapper(relalias, disjoints) => relalias
@@ -335,7 +351,9 @@
       case sparql.TriplesBlock(triplepatterns) => {
 	/* Examine each triple, updating the compilation state. */
 	val state2 = triplepatterns.foldLeft(state)((incState,s) => bindOnPredicate(db, incState, s, enforceForeignKeys))
-	val nullExprs = findVars(gp).foldLeft(Set[sql.Expression]())((s, vvar) => {
+	val attrAliases = state2.exprs.foldLeft(Set[sql.RelAliasAttribute]())((s, e) => s ++ findNonNullRelAliasAttrs(e))
+	val needNullGuards = findVars(gp).foldLeft(Set[sparql.Var]())((s, v) => if (attrAliases.contains(varToAttribute(state2.varmap, v))) s else s + v)
+	val nullExprs = needNullGuards.foldLeft(Set[sql.Expression]())((s, vvar) => {
 	  state2.varmap(vvar) match {
 	    case RDFBNoder(relation, relalias, disjoints) => s
 	    case _ => s ++ Set(sql.RelationalExpressionNotNull(sql.PrimaryExpressionAttr(varToAttribute(state2.varmap, vvar))))
--- a/src/test/scala/RDB2RDFTest.scala	Fri Jan 08 08:27:15 2010 -0500
+++ b/src/test/scala/RDB2RDFTest.scala	Fri Jan 08 19:31:52 2010 -0500
@@ -221,7 +221,7 @@
 SELECT R_manager.empid AS manager
        FROM Employee AS R_empid253
             INNER JOIN Employee AS R_manager
- WHERE R_manager.empid=R_empid253.manager AND R_empid253.empid=253 AND R_manager.empid IS NOT NULL
+ WHERE R_manager.empid=R_empid253.manager AND R_empid253.empid=253
 """).get
     val generated = RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), true, false)
     assert(generated === parsed)
@@ -280,7 +280,7 @@
        INNER JOIN Employee AS R_who ON R_who.empid=R_task1.employee
        INNER JOIN TaskAssignments AS R_task2 ON R_who.empid=R_task2.employee
  WHERE R_task1.id<R_task2.id 
-   AND R_task1.id IS NOT NULL AND R_task2.id IS NOT NULL AND R_who.empid IS NOT NULL
+   AND R_task1.id IS NOT NULL AND R_task2.id IS NOT NULL
 """).get
     val generated = RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), true, false)
     assert(generated === parsed)
@@ -312,7 +312,7 @@
        FROM Employee AS R_emp
             INNER JOIN Employee AS R_manager ON R_manager.empid=R_emp.manager
  WHERE R_emp    .lastName IS NOT NULL AND R_emp    .empid IS NOT NULL
-   AND R_manager.lastName IS NOT NULL AND R_manager.empid IS NOT NULL
+   AND R_manager.lastName IS NOT NULL
 """).get
     val generated = RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), true, false)
     assert(generated === parsed)
@@ -401,7 +401,6 @@
        INNER JOIN Employee AS R_manager
 WHERE R_manager.empid=R_emp.manager AND R_manager.lastName="Johnson" AND R_emp.lastName IS NOT NULL
  AND R_emp.empid IS NOT NULL
- AND R_manager.empid IS NOT NULL
 """).get
     val generated = RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), true, false)
     assert(generated === parsed)
@@ -447,12 +446,9 @@
        INNER JOIN Tasks AS R_utask ON R_utask.taskid=R_upper.task
        INNER JOIN Employee AS R_grandManager ON R_grandManager.empid=R_utask.lead
  WHERE R_taskLead.birthday<R_emp.birthday AND R_grandManager.birthday<R_taskLead.birthday
-   AND R_grandManager.birthday IS NOT NULL AND R_emp.empid IS NOT NULL
-   AND R_taskLead.empid IS NOT NULL AND R_emp.lastName IS NOT NULL
-   AND R_emp.birthday IS NOT NULL AND R_grandManager.empid IS NOT NULL
-   AND R_grandManager.lastName IS NOT NULL AND R_utask.taskid IS NOT NULL
-   AND R_taskLead.birthday IS NOT NULL AND R_ltask.taskid IS NOT NULL
-   AND R_lower.id IS NOT NULL AND R_upper.id IS NOT NULL
+   AND R_grandManager.birthday IS NOT NULL AND R_emp.lastName IS NOT NULL
+   AND R_emp.birthday IS NOT NULL AND R_grandManager.lastName IS NOT NULL
+   AND R_taskLead.birthday IS NOT NULL AND R_lower.id IS NOT NULL AND R_upper.id IS NOT NULL
 """).get
     val generated = RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), true, false)
     assert(generated === parsed)
@@ -496,8 +492,8 @@
            FROM TaskAssignments AS R_above
                 INNER JOIN Tasks AS R_atask ON R_atask.taskid=R_above.task
                 INNER JOIN Employee AS R_taskLead ON R_taskLead.empid=R_atask.lead
-          WHERE R_atask.lead IS NOT NULL AND R_above.employee IS NOT NULL
-            AND R_above.task IS NOT NULL AND R_taskLead.lastName IS NOT NULL
+          WHERE R_above.employee IS NOT NULL
+            AND R_taskLead.lastName IS NOT NULL
             AND R_above.id IS NOT NULL
        UNION
          SELECT 1 AS _DISJOINT_, NULL AS above, NULL AS atask,
@@ -508,8 +504,8 @@
            FROM TaskAssignments AS R_below
                 INNER JOIN Tasks AS R_btask ON R_btask.taskid=R_below.task
                 INNER JOIN Employee AS R_managed ON R_managed.empid=R_below.employee
-          WHERE R_below.employee IS NOT NULL AND R_managed.lastName IS NOT NULL
-            AND R_below.task IS NOT NULL AND R_btask.lead IS NOT NULL
+          WHERE R_managed.lastName IS NOT NULL
+            AND R_btask.lead IS NOT NULL
             AND R_below.id IS NOT NULL
                        ) AS G_union1
  WHERE R_who.lastName="Smith"
@@ -565,8 +561,8 @@
            FROM TaskAssignments AS R_above
                 INNER JOIN Tasks AS R_atask ON R_atask.taskid=R_above.task
                 INNER JOIN Employee AS R_taskLead ON R_taskLead.empid=R_atask.lead
-          WHERE R_above.employee IS NOT NULL AND R_above.id IS NOT NULL AND R_above.task IS NOT NULL
-            AND R_atask.lead IS NOT NULL AND R_taskLead.lastName IS NOT NULL
+          WHERE R_above.employee IS NOT NULL AND R_above.id IS NOT NULL
+            AND R_taskLead.lastName IS NOT NULL
        UNION
          SELECT NULL AS above, NULL AS atask, R_managed.birthday AS bday, R_below.id AS below,
                 R_below.task AS btask, R_below.employee AS managed, R_managed.lastName AS name,
@@ -574,14 +570,14 @@
            FROM TaskAssignments AS R_below
                 INNER JOIN Tasks AS R_btask ON R_btask.taskid=R_below.task
                 INNER JOIN Employee AS R_managed ON R_managed.empid=R_below.employee
-          WHERE R_below.employee IS NOT NULL AND R_below.id IS NOT NULL AND R_below.task IS NOT NULL
+          WHERE R_below.id IS NOT NULL
             AND R_btask.lead IS NOT NULL AND R_managed.birthday IS NOT NULL AND R_managed.lastName IS NOT NULL
                   ) AS G_union0
        INNER JOIN Employee AS R_who
  WHERE (G_union0._DISJOINT_!=0 OR R_who.empid=G_union0.who)
    AND (G_union0._DISJOINT_!=1 OR R_who.birthday=G_union0.bday)
    AND (G_union0._DISJOINT_!=1 OR R_who.empid=G_union0.who)
-   AND G_union0.bday IS NOT NULL AND G_union0.who IS NOT NULL AND R_who.lastName="Smith"
+   AND R_who.lastName="Smith"
 """).get
     val generated = RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), false, false)
     assert(generated === parsed)
@@ -623,8 +619,8 @@
            FROM TaskAssignments AS R_above
                 INNER JOIN Tasks AS R_atask ON R_atask.taskid=R_above.task
                 INNER JOIN Employee AS R_taskLead ON R_taskLead.empid=R_atask.lead
-          WHERE R_above.employee IS NOT NULL AND R_above.id IS NOT NULL AND R_above.task IS NOT NULL
-            AND R_atask.lead IS NOT NULL AND R_taskLead.lastName IS NOT NULL
+          WHERE R_above.employee IS NOT NULL AND R_above.id IS NOT NULL
+            AND R_taskLead.lastName IS NOT NULL
        UNION
          SELECT NULL AS above, NULL AS atask, R_managed.birthday AS bday, R_below.id AS below,
                 R_below.task AS btask, R_below.employee AS managed, R_managed.lastName AS name,
@@ -632,7 +628,7 @@
            FROM TaskAssignments AS R_below
                 INNER JOIN Tasks AS R_btask ON R_btask.taskid=R_below.task
                 INNER JOIN Employee AS R_managed ON R_managed.empid=R_below.employee
-          WHERE R_below.employee IS NOT NULL AND R_below.id IS NOT NULL AND R_below.task IS NOT NULL
+          WHERE R_below.id IS NOT NULL
             AND R_btask.lead IS NOT NULL AND R_managed.birthday IS NOT NULL AND R_managed.lastName IS NOT NULL
                   ) AS G_union1
  WHERE (G_union1._DISJOINT_!=0 OR G_union1.who=R_who.empid)
@@ -680,8 +676,8 @@
            FROM TaskAssignments AS R_above
                 INNER JOIN Tasks AS R_atask ON R_atask.taskid=R_above.task
                 INNER JOIN Employee AS R_taskLead ON R_taskLead.empid=R_atask.lead
-          WHERE R_above.employee IS NOT NULL AND R_above.id IS NOT NULL AND R_above.task IS NOT NULL
-            AND R_atask.lead IS NOT NULL AND R_taskLead.lastName IS NOT NULL
+          WHERE R_above.employee IS NOT NULL AND R_above.id IS NOT NULL
+            AND R_taskLead.lastName IS NOT NULL
        UNION
          SELECT NULL AS above, NULL AS atask, R_managed.birthday AS bday, R_below.id AS below,
                 R_below.task AS btask, R_below.employee AS managed, R_managed.lastName AS name,
@@ -689,13 +685,13 @@
            FROM TaskAssignments AS R_below
                 INNER JOIN Tasks AS R_btask ON R_btask.taskid=R_below.task
                 INNER JOIN Employee AS R_managed ON R_managed.empid=R_below.employee
-          WHERE R_below.employee IS NOT NULL AND R_below.id IS NOT NULL AND R_below.task IS NOT NULL
+          WHERE R_below.id IS NOT NULL
             AND R_btask.lead IS NOT NULL AND R_managed.birthday IS NOT NULL AND R_managed.lastName IS NOT NULL
                   ) AS G_union1
  WHERE (G_union1._DISJOINT_!=0 OR G_union1.who=R_who.empid)
    AND (G_union1._DISJOINT_!=1 OR G_union1.who=R_who.empid)
    AND (G_union1._DISJOINT_!=1 OR R_who.birthday=G_union1.bday)
-   AND G_union1.bday IS NOT NULL AND R_who.empid IS NOT NULL AND R_who.lastName="Smith"
+   AND R_who.empid IS NOT NULL AND R_who.lastName="Smith"
 """).get
     val generated = RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), false, false)
     assert(generated === parsed)
@@ -724,8 +720,7 @@
          FROM Employee AS R_emp
               INNER JOIN Employee AS R_manager ON R_manager.empid=R_emp.manager
               INNER JOIN Employee AS R_grandManager ON R_grandManager.empid=R_manager.manager
-        WHERE R_emp.empid IS NOT NULL AND R_emp.manager IS NOT NULL AND R_grandManager.lastName IS NOT NULL
-          AND R_manager.lastName IS NOT NULL AND R_manager.manager IS NOT NULL
+        WHERE R_emp.empid IS NOT NULL AND R_grandManager.lastName IS NOT NULL AND R_manager.lastName IS NOT NULL
                   ) AS G_opt1 ON G_opt1.emp=R_emp.empid
  WHERE R_emp.empid IS NOT NULL AND R_emp.lastName IS NOT NULL
 """).get
@@ -754,12 +749,11 @@
        SELECT R_taskLead.manager AS emp, R_grandManager.lastName AS grandManagName, R_taskLead.manager AS grandManager, R_taskLead.empid AS taskLead, 1 AS _DISJOINT_
          FROM Employee AS R_taskLead
               INNER JOIN Employee AS R_grandManager ON R_grandManager.empid=R_taskLead.manager
-        WHERE R_grandManager.lastName IS NOT NULL AND R_taskLead.empid IS NOT NULL AND R_taskLead.manager IS NOT NULL
+        WHERE R_grandManager.lastName IS NOT NULL AND R_taskLead.empid IS NOT NULL
                   ) AS G_opt1 ON 1=1
        INNER JOIN Employee AS R_emp
  WHERE (G_opt1._DISJOINT_ IS NULL OR R_emp.empid=G_opt1.emp)
    AND R_emp.lastName IS NOT NULL
-   AND G_opt1.emp IS NOT NULL
 """).get
     val generated = RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), false, false)
     assert(generated === parsed)
@@ -792,10 +786,9 @@
               SELECT R_grandManager.lastName AS grandManagName, R_manager.manager AS grandManager, R_manager.empid AS manager, 2 AS _DISJOINT_
                 FROM Employee AS R_manager
                      INNER JOIN Employee AS R_grandManager ON R_grandManager.empid=R_manager.manager
-               WHERE R_grandManager.lastName IS NOT NULL AND R_manager.empid IS NOT NULL AND R_manager.manager IS NOT NULL
+               WHERE R_grandManager.lastName IS NOT NULL AND R_manager.empid IS NOT NULL
                          ) AS G_opt2 ON G_opt2.manager=R_emp.manager
         WHERE R_emp.empid IS NOT NULL
-          AND R_emp.manager IS NOT NULL
           AND R_manager.lastName IS NOT NULL
                   ) AS G_opt1 ON G_opt1.emp=R_emp.empid
  WHERE R_emp.empid IS NOT NULL AND R_emp.lastName IS NOT NULL
@@ -844,7 +837,7 @@
    AND R_emp1.empid IS NOT NULL AND R_emp1.lastName IS NOT NULL AND R_emp1.lastName<R_emp2.lastName
    AND R_emp2.empid IS NOT NULL AND R_emp2.lastName IS NOT NULL AND R_emp2.lastName<R_emp3.lastName
    AND R_emp3.empid IS NOT NULL AND R_emp3.lastName IS NOT NULL AND R_emp3.lastName<R_emp4.lastName
-   AND R_emp4.empid IS NOT NULL AND R_emp4.lastName IS NOT NULL AND G_opt1.birthday IS NOT NULL
+   AND R_emp4.empid IS NOT NULL AND R_emp4.lastName IS NOT NULL
 """).get
     val generated = RDB2RDF(db, sparqlSelect, StemURI("http://hr.example/DB/"), false, false)
     assert(generated === parsed)