+ commented TableDisjunction
authorEric Prud'hommeaux <eric@w3.org>
Thu, 20 May 2010 11:55:05 +0200
changeset 196 df3c379243a2
parent 195 7cdd51c3b816
child 197 88b71c4aa8d8
+ commented TableDisjunction
src/main/scala/SparqlToSql.scala
--- a/src/main/scala/SparqlToSql.scala	Wed May 19 21:57:02 2010 +0200
+++ b/src/main/scala/SparqlToSql.scala	Thu May 20 11:55:05 2010 +0200
@@ -722,13 +722,24 @@
      */
     val nestedState = mapGraphPattern(db, emptyState, gp, enforceForeignKeys)
     val nestedVars = gp.findVars
-    /**  */
-    val joinNo = sql.NamedAttribute(sql.PrimaryExpressionTyped(sql.Datatype.INTEGER,sql.Name("" + initState.joins.size)),
+    /**
+     * Select a constant as _DISJOINT_ so later constraints can be
+     * sensitive to whether a variable was bound.
+     * This matters for assymetric UNIONs and, here, OPTIONALs. Given:
+     *   Join( LeftJoin( BGP(),
+     *                   BGP( ?x :p2 ?v2 ) ),
+     *         BGP( ?y :p3 ?v2 ) )
+     * coreference constraints against ?v2 should only be enforced for
+     * tuples from the right side of this union.
+     */
+    val pathNo = sql.NamedAttribute(sql.PrimaryExpressionTyped(sql.Datatype.INTEGER,sql.Name("" + initState.joins.size)),
 					sql.AttrAlias(sql.Name("_DISJOINT_")))
-    val leftJoinVars = gp.findVars.toList
-    val attrlist:Set[sql.NamedAttribute] = leftJoinVars.foldLeft(Set(joinNo))((attrs, v) =>
-      attrs ++ Set(sql.NamedAttribute(varToAttribute(nestedState.varmap, sparql.VarAssignable(v)), sql.AttrAlias(attrAliasNameFromVar(sparql.VarAssignable(v)))))
-      										)
+    val leftJoinVars = gp.findVars
+    val attrlist:Set[sql.NamedAttribute] = leftJoinVars.map(
+      v =>
+	sql.NamedAttribute(varToAttribute(nestedState.varmap, sparql.VarAssignable(v)),
+			   sql.AttrAlias(attrAliasNameFromVar(sparql.VarAssignable(v))))
+    ) + pathNo // add join number to selections
     val subselect = sql.Select(
       sql.AttributeList(attrlist),
       sql.TableList(nestedState.joins),
@@ -745,8 +756,8 @@
     val nestedCond = sql.RelationalExpressionNull(sql.PrimaryExpressionAttr(
       sql.RelVarAttr(leftJoinAlias, sql.Attribute(sql.Name("_DISJOINT_")))))
 
-    /** Bind variables to the attributes projected from the subselect; handle
-     * corefs (equivalence with earlier bindings).
+    /** Bind variables to the attributes projected from the subselect;
+     * handle corefs (equivalence with earlier bindings).
      */
     val outerState2 =
       nestedVars.foldLeft(
@@ -765,7 +776,7 @@
 	   case 0 =>
 	     sql.RelationalExpressionEq(sql.PrimaryExpressionTyped(sql.Datatype.INTEGER,sql.Name("1")),
 					sql.PrimaryExpressionTyped(sql.Datatype.INTEGER,sql.Name("1")))
-	     // /* Require corefs unless we have a leading OPTIONAL. */
+	     /** Require corefs unless we have a leading OPTIONAL. */
 	     // if (...)
 	     // else
 	     //   error ("Nested GP has no variables shared with its context; cowaredly refusing to join ON 1.")
@@ -796,7 +807,8 @@
 
       /** <a href="http://www.w3.org/TR/rdf-sparql-query/#defn_PatternInstanceMapping">Basic Graph Pattern Matching</a>()
        * @param triplepatterns  set of triple patterns. Premise: all triple patterns must resolve against the direct graph.
-       * As { TP1, TP2 } === Join({ TP1 }, { TP2 }), we triple patterns and conjunctions all contribute to the same set of SQL joins, variable bindings and constraints.
+       * As { TP1, TP2 } == Join({ TP1 }, { TP2 }), we can view the bindOnPredicate function as partitioning triple patterns by the relvar they match.
+       * This isn't observable in the SQL query as all the triple patterns in all the conjunctions contribute to the same query.
        */
       case sparql.TriplesBlock(triplepatterns) => {
 	/** Examine each triple, updating the compilation state. */
@@ -818,10 +830,10 @@
 
       /** <a href="http://www.w3.org/TR/rdf-sparql-query/#defn_evalJoin">Join</a>(P1, P2)
        * Since Join is association, we handle this as an n-ary join.
-       * @param list  list of graph patterns to join.
+       * @param conjoints  list of graph patterns to join.
        */
-      case sparql.TableConjunction(list) => {
-	list.foldLeft(state)((incState,s) => mapGraphPattern(db, incState, s, enforceForeignKeys))
+      case sparql.TableConjunction(conjoints) => {
+	conjoints.foldLeft(state)((incState,s) => mapGraphPattern(db, incState, s, enforceForeignKeys))
       }
 
       /** <a href="http://www.w3.org/TR/rdf-sparql-query/#defn_evalLeftJoin">LeftJoin</a>(P1, P2, F)
@@ -836,7 +848,7 @@
 	  if (state.joins.size == 0)
 	    /**
 	     * Leading optionals (ASK WHERE { OPTIONAL { ... } ... }) in SPARQL don't have a counterpart in SQL.
-	     * We emulate leading optionals with a leading SQL table which projects one solution with no selected attributes.
+	     * We emulate leading optionals with a leading SQL subselect which projects one solution with no selected attributes.
 	     */
 	    R2RState(state.joins + sql.InnerJoin(sql.AliasedResource(sql.Subselect(
 	      sql.Select(
@@ -867,11 +879,12 @@
       }
 
       /** <a href="http://www.w3.org/TR/rdf-sparql-query/#defn_evalUnion">Union</a>(P1, P2)
-       * Since Disjunction is association, we handle this as an n-ary disjunction.
+       * Since Disjunction is associative, we handle this as an n-ary disjunction.
        * @param disjoinits  list of graph patterns to concatenate.
        */
       case sparql.TableDisjunction(disjoints) => {
-	/** SPARQL UNIONs are realized as SQL subselects.
+	/**
+	 * SPARQL UNIONs are realized as SQL subselects.
 	 * Set up initial state for this subselect.
 	 */
 	val unionAlias = sql.RelVar(sql.Name("G_union" + state.joins.size)) // invent a unique name for this union.
@@ -883,17 +896,28 @@
 	val unionVars = disjoints.foldLeft(Set[sparql.Var]())((mySet,disjoint) =>
 	  mySet ++ disjoint.findVars).toList // all variables nested in the disjoints.
 
-	/** Map the disjoints to subselects.
-	 * <no> is used for uniquely naming flags in the SELECTs used to
-	 * indicate which disjoint produced a tuple.
+	/**
+	 * Map the disjoints to subselects.
+	 * Non-Functional var <code>number</code> is used for projecting unique
+	 * constants to indicate which disjoint produced a tuple.
 	 */
-	val (subselects, _) = disjoints.foldLeft((Set[sql.Select](), 0))((incPair,disjoint) => {
-	  val (subselects, no) = incPair
+	var number = 0
+	val subselects = disjoints.foldLeft(Set[sql.Select]())((subselects, disjoint) => {
 	  val disjointState = mapGraphPattern(db, emptyState, disjoint, enforceForeignKeys)
-	  val disjointNo = sql.NamedAttribute(sql.PrimaryExpressionTyped(sql.Datatype.INTEGER,sql.Name("" + no)),
-					      sql.AttrAlias(sql.Name("_DISJOINT_")))
+	  /**
+	   * Select a constant as _DISJOINT_ so later constraints can be
+	   * sensitive to whether a variable was bound.
+	   * This matters for OPTIONALs and, here, assymetric UNIONs. Given:
+	   *   Join( Union( BGP( ?x :p1 ?v1 ),
+	   *                BGP( ?x :p2 ?v1 . ?x :p2 ?v2 ) ),
+	   *         BGP( ?y :p3 ?v2 ) )
+	   * coreference constraints against ?v2 should only be enforced for
+	   * tuples from the right side of this union.
+	   */
+	  val pathNo = sql.NamedAttribute(sql.PrimaryExpressionTyped(sql.Datatype.INTEGER,sql.Name("" + number)),
+					  sql.AttrAlias(sql.Name("_DISJOINT_")))
 
-	  val attrlist:Set[sql.NamedAttribute] = unionVars.foldLeft(Set(disjointNo))((attrs, v) => {
+	  val attrlist:Set[sql.NamedAttribute] = unionVars.foldLeft(Set(pathNo))((attrs, v) => {
 	    val attrOrNull = if (disjointState.varmap.contains(sparql.VarAssignable(v))) varToAttribute(disjointState.varmap, sparql.VarAssignable(v)) else sql.ConstNULL()
 	    attrs ++ Set(sql.NamedAttribute(attrOrNull, sql.AttrAlias(attrAliasNameFromVar(sparql.VarAssignable(v)))))
 	  })
@@ -907,28 +931,28 @@
 	      case _ => Some(sql.ExprConjunction(disjointState.exprs))
 	    }
 	  )
-	  (subselects + subselect, no+1)
+	  number = number + 1 // non-functional, but clearer than wrapping as a parameter in a foldLeft
+	  subselects + subselect
 	})
 
-	/* Bind variables to the attributes projected from the subselect; handle
-	 * corefs (equivalence with earlier bindings).
-	 * <no> is used for uniquely naming flags in the SELECTs used to
-	 * indicate which disjoint produced a tuple.
-	 * <state2> will have no additional tables in the TableList.
+	/**
+	 * Connect the variables projected from the nested selects into the outer variable bindings and constraints.
+	 * <code>state2</code> will have no additional tables in the TableList.
+	 * <code>number</code> is again used for projecting unique
+	 * constants to indicate which disjoint produced a tuple.
 	 */
-	val (state2, _) = disjoints.foldLeft((state, 0))((incPair,disjoint) => {
-	  val (outerState, no) = incPair
+	number = 0
+	val state2 = disjoints.foldLeft(state)((outerState, disjoint) => {
 	  val disjointState = mapGraphPattern(db, emptyState, disjoint, enforceForeignKeys)
 	  val disjointVars = disjoint.findVars
 
-	  /* Create a condition to test if this OPTIONAL was matched (called
-	   * _DISJOINT_ as OPTIONAL behaves pretty much like a disjunction).
-	   */
+	  /** Create a condition to test if this disjoint was matched. */
 	  val disjointCond = sql.RelationalExpressionNe(sql.PrimaryExpressionAttr(sql.RelVarAttr(unionAlias, sql.Attribute(sql.Name("_DISJOINT_")))),
-							sql.PrimaryExpressionTyped(sql.Datatype.INTEGER,sql.Name("" + no)))
+							sql.PrimaryExpressionTyped(sql.Datatype.INTEGER,sql.Name("" + number)))
 	  val outerState2 = disjointVars.foldLeft(outerState)((myState, v) =>
 	      subselectVars(myState, sparql.VarAssignable(v), unionAlias, disjointCond, outerState.varmap, disjointState.varmap, false))
-	  (outerState2, no+1)
+	  number = number + 1 // non-functional, but clearer than wrapping as a parameter in a foldLeft
+	  outerState2
 	})
 	val subselect = sql.Subselect(sql.Union(subselects))
 	R2RState(state.joins + sql.InnerJoin(sql.AliasedResource(subselect,unionAlias), None), state2.varmap, state2.exprs)