+ more comments
authorEric Prud'hommeaux <eric@w3.org>
Wed, 19 May 2010 15:23:18 +0200
changeset 193 4e5e879223fe
parent 192 2773c2007113
child 194 9c8dfec36968
+ more comments
~ tweaked order of GPs
src/main/scala/SparqlToSql.scala
--- a/src/main/scala/SparqlToSql.scala	Tue May 18 09:48:46 2010 +0200
+++ b/src/main/scala/SparqlToSql.scala	Wed May 19 15:23:18 2010 +0200
@@ -1,5 +1,7 @@
 /** SparqlToSql: convert SPARQL queries to sound SQL queries.
- * <pre>
+ * <a href="http://www.w3.org/TR/rdf-sparql-query/#sparqlDefinition">SPARQL semantics</a> defines the evalutation of an abstract query on a dataset.
+ * This object maps a SPARQL abstract query over a <a href="@@">direct graph</a> to an SQL abstract query over the constituant relations.
+ * <p/>
  * Please read from the bottom -- i.e. apply calls mapGraphPattern with the root
  * graph pattern. mapGraphPattern handles all the graph pattern types in SPARQL,
  * effectively peforming the Convert Graph Patterns step in SPARQL 1.0 12.2.1
@@ -525,7 +527,7 @@
    * @return         an SQL CONCAT expression
    * @see            VarMap
    */
-  def varToConcat(varmap:Map[sparql.Assignable, SQL2RDFValueMapper], vvar:sparql.Assignable, stem:StemURI):sql.Expression = {
+  def varToExpr(varmap:Map[sparql.Assignable, SQL2RDFValueMapper], vvar:sparql.Assignable, stem:StemURI):sql.Expression = {
     varmap(vvar) match {
       case IntMapper(binding) => sql.PrimaryExpressionAttr(bindingToAttribute(binding))
       case StringMapper(binding) => 
@@ -698,6 +700,11 @@
     }
   }
 
+  /**
+   * Recursively add the joins, variable mappings and constraints for an SQL query implementing a graph pattern.
+   * @param db  database description.
+   * @return a new state including the subquery representing gp in a join.
+   */
   def synthesizeOuterJoin(initState:R2RState, gp:sparql.GraphPattern, negate:Boolean, db:sql.DatabaseDesc, enforceForeignKeys:Boolean):R2RState = {
     /** SPARQL OPTIONALs and UNIONs are treated as SQL subselects.
      * Set up initial state for this subselect.
@@ -771,17 +778,24 @@
     R2RState(initState.joins + join, outerState2.varmap, exprs)
   }
 
+  /**
+   * Recursively add the joins, variable mappings and constraints for an SQL query implementing a graph pattern.
+   * @param db  database description.
+   * @param state  initial set of joins, variable mappings and constraints.
+   * @param gp  the SPARQL GraphPattern to represent as SQL.
+   * @param enforceForeignKeys  if true, SPARQL triple patterns corresponding to foreign keys, e.g. <code>?who :hasParent ?parent</code>, generate a join on the referenced table.
+   * @return  a new set of joins, variable mappings and constraints.
+   * Per <a href="http://www.w3.org/TR/rdf-sparql-query/#sparqlQuery">definition of SPARQL Query</a>, SPARQL Graph Patterns are (Basic Graph Pattern, Join, LeftJoin, Filter, Union, Graph).
+   * mapGraphPattern maps each of these to an SQL abstract query (which can then be serialized as SQL and executed).
+   * 
+   */
   def mapGraphPattern(db:sql.DatabaseDesc, state:R2RState, gp:sparql.GraphPattern, enforceForeignKeys:Boolean):R2RState = {
     gp match {
-      case sparql.TableFilter(gp2:sparql.GraphPattern, expr:sparql.Expression) => {
-	val state2 = mapGraphPattern(db, state, gp2, enforceForeignKeys)
 
-	/* Add constraints for all the FILTERS */
-	val filterExprs:Set[sql.RelationalExpression] =
-	  expr.conjuncts.toSet map ((x:sparql.PrimaryExpression) => filter2expr(state2.varmap, x))
-
-	R2RState(state2.joins, state2.varmap, state2.exprs ++ filterExprs)
-      }
+      /** <a href="http://www.w3.org/TR/rdf-sparql-query/#defn_PatternInstanceMapping">Basic Graph Pattern Matching</a>()
+       * @param triplepatterns  set of triple patterns. Premise: all triple patterns must resolve against the direct graph.
+       * As { TP1, TP2 } === Join({ TP1 }, { TP2 }), we triple patterns and conjunctions all contribute to the same set of SQL joins, variable bindings and constraints.
+       */
       case sparql.TriplesBlock(triplepatterns) => {
 	/* Examine each triple, updating the compilation state. */
 	val state2 = triplepatterns.foldLeft(state)((incState,s) => bindOnPredicate(db, incState, s, enforceForeignKeys))
@@ -793,11 +807,58 @@
 	})
 	R2RState(state2.joins, state2.varmap, state2.exprs ++ nullExprs)
       }
+
+      /** <a href="http://www.w3.org/TR/rdf-sparql-query/#defn_evalJoin">Join</a>(P1, P2)
+       * Since Join is association, we handle this as an n-ary join.
+       * @param list  list of graph patterns to join.
+       */
       case sparql.TableConjunction(list) => {
 	list.foldLeft(state)((incState,s) => mapGraphPattern(db, incState, s, enforceForeignKeys))
       }
+
+      /** <a href="http://www.w3.org/TR/rdf-sparql-query/#defn_evalLeftJoin">LeftJoin</a>(P1, P2, F)
+       * The parser providing the SPARQL abstract query turns LeftJoin(P1, P2, F) into Join(P1, Optional(P2)), or Join(P1, Optional(Filter(P2, F))) if there is a FILTER.
+       * @param gp2  nested graph pattern (Ω in algebra)
+       */
+      case sparql.OptionalGraphPattern(gp2) => {
+	/* state_postLeadingTable: create an initial table if the first conjoint is optional.
+	 * e.g. ... FROM (SELECT 1 AS _EMPTY_) AS _EMPTY_ LEFT OUTER JOIN ...
+	 */
+	val state_postLeadingTable =
+	  if (state.joins.size == 0)
+	    R2RState(state.joins + sql.InnerJoin(sql.AliasedResource(sql.Subselect(
+	      sql.Select(
+		sql.AttributeList(Set(sql.NamedAttribute(sql.PrimaryExpressionTyped(sql.Datatype.INTEGER,sql.Name("1")),
+							 sql.AttrAlias(sql.Name("_EMPTY_"))))),
+		sql.TableList(util.AddOrderedSet()),
+		None
+	      )), sql.RelVar(sql.Name("_EMPTY_"))), None), state.varmap, state.exprs)
+	  else
+	    state
+	synthesizeOuterJoin(state_postLeadingTable, gp2, false, db, enforceForeignKeys)
+      }
+
+      /** <a href="http://www.w3.org/TR/rdf-sparql-query/#defn_algFilter">Filter</a>(expr, Ω)
+       * @param gp2  nested graph pattern (Ω in algebra)
+       * @param expr  boolean SPARQL expression (expr in algebra)
+       */
+      case sparql.TableFilter(gp2:sparql.GraphPattern, expr:sparql.Expression) => {
+	/** Calculate state for gp2. */
+	val state2 = mapGraphPattern(db, state, gp2, enforceForeignKeys)
+
+	/** Add constraints for all the FILTERS */
+	val filterExprs:Set[sql.RelationalExpression] =
+	  expr.conjuncts.toSet map ((x:sparql.PrimaryExpression) => filter2expr(state2.varmap, x))
+
+	R2RState(state2.joins, state2.varmap, state2.exprs ++ filterExprs)
+      }
+
+      /** <a href="http://www.w3.org/TR/rdf-sparql-query/#defn_evalUnion">Union</a>(P1, P2)
+       * Since Disjunction is association, we handle this as an n-ary disjunction.
+       * @param disjoinits  list of graph patterns to concatenate.
+       */
       case sparql.TableDisjunction(disjoints) => {
-	/* SPARQL UNIONs are treated as SQL subselects.
+	/** SPARQL UNIONs are treated as SQL subselects.
 	 * Set up initial state for this subselect.
 	 */
 	val unionAlias = sql.RelVar(sql.Name("G_union" + state.joins.size))
@@ -809,7 +870,7 @@
 	val unionVars = disjoints.foldLeft(Set[sparql.Var]())((mySet,disjoint) =>
 	  mySet ++ disjoint.findVars).toList
 
-	/* Map the disjoints to subselects.
+	/** Map the disjoints to subselects.
 	 * <no> is used for uniquely naming flags in the SELECTs used to
 	 * indicate which disjoint produced a tuple.
 	 */
@@ -859,53 +920,67 @@
 	val subselect = sql.Subselect(sql.Union(subselects))
 	R2RState(state.joins + sql.InnerJoin(sql.AliasedResource(subselect,unionAlias), None), state2.varmap, state2.exprs)
       }
-      case sparql.OptionalGraphPattern(gp) => {
-	/* state_postLeadingTable: create an initial table if the first conjoint is optional.
-	 * e.g. ... FROM (SELECT 1 AS _EMPTY_) AS _EMPTY_ LEFT OUTER JOIN ...
-	 */
-	val state_postLeadingTable =
-	  if (state.joins.size == 0)
-	    R2RState(state.joins + sql.InnerJoin(sql.AliasedResource(sql.Subselect(
-	      sql.Select(
-		sql.AttributeList(Set(sql.NamedAttribute(sql.PrimaryExpressionTyped(sql.Datatype.INTEGER,sql.Name("1")),
-							 sql.AttrAlias(sql.Name("_EMPTY_"))))),
-		sql.TableList(util.AddOrderedSet()),
-		None
-	      )), sql.RelVar(sql.Name("_EMPTY_"))), None), state.varmap, state.exprs)
-	  else
-	    state
-	synthesizeOuterJoin(state_postLeadingTable, gp, false, db, enforceForeignKeys)
+
+      /** <a href="http://www.w3.org/TR/rdf-sparql-query/#defn_evalGraph">Graph</a>(IRI, P)
+       * I don't know what the parser did with the IRI, but we don't know what to do with GRAPHs anyways. 
+       * @param gp2  nested graph pattern (Ω in algebra)
+       */
+      case sparql.GraphGraphPattern(gp2) => error("no code to handle GraphGraphPatterns (" + gp2 + ")")
+
+      /** Minus is from SPARQL 1.1 (in progress). This doesn't need documentation now.
+       * @param gp2  the graph pattern to subtract.
+       */
+      case sparql.MinusGraphPattern(gp2) => {
+	if (state.joins.size == 0) state
+	else synthesizeOuterJoin(state, gp2, true, db, enforceForeignKeys)
       }
-      case sparql.MinusGraphPattern(gp) => {
-	if (state.joins.size == 0) state
-	else synthesizeOuterJoin(state, gp, true, db, enforceForeignKeys)
-      }
-      case sparql.GraphGraphPattern(gp) => error("no code to handle GraphGraphPatterns (" + gp + ")")
     }
   }
 
+  /**
+   * Default interface for SparqlToSql.
+   * @param db  database description.
+   * @param sparquery  SPARQL compile tree.
+   * @param stem  stem URI for all generated RDF URIs.
+   * @param enforceForeignKeys  if true, SPARQL triple patterns corresponding to foreign keys, e.g. ?who :hasParent ?parent , generate a join on the referenced table.
+   * @param concat  if true, keys will produce SQL functions to generate a URI, e.g. SELECT CONCAT(stemURI, table, "/", pk, ".", R_who.pk) AS who
+   * @return an SQL query corresponding to sparquery
+   */
   def apply (db:sql.DatabaseDesc, sparquery:sparql.Select, stem:StemURI, enforceForeignKeys:Boolean, concat:Boolean) : sql.Select = {
     val sparql.Select(attrs, triples) = sparquery
 
-    /* Create an object to hold our compilation state. */
+    /** Create an object to hold our compilation state. */
     val initState = R2RState(
       util.AddOrderedSet[sql.Join](), 
       Map[sparql.Assignable, SQL2RDFValueMapper](), 
       Set[sql.Expression]()
     )
 
+    /**
+     * Generate a new state with the joins, mappings to sql expressions, and
+     * constraints implicit in the SPARQL WHERE pattern.
+     */
     val r2rState = mapGraphPattern(db, initState, sparquery.gp, enforceForeignKeys)
 
-    /* Select the attributes corresponding to the variables
-     * in the SPARQL SELECT.  */
-    val attrlist:Set[sql.NamedAttribute] = attrs.attributelist.foldLeft(Set[sql.NamedAttribute]())((attrs, v) => 
-      attrs + sql.NamedAttribute({
-	if (concat) varToConcat(r2rState.varmap, sparql.VarAssignable(v), stem)
-	else varToAttribute(r2rState.varmap, sparql.VarAssignable(v))
-      } , sql.AttrAlias(attrAliasNameFromVar(sparql.VarAssignable(v)))
-      ))
+    /**
+     * Select the attributes corresponding to the variables
+     * in the SPARQL SELECT.
+     */
+    val attrlist:Set[sql.NamedAttribute] =
+      // This foldLeft could be a map, if i could coerce to a set afterwards.
+      attrs.attributelist.foldLeft(Set[sql.NamedAttribute]())((attrs, v) => {
+	val exp =
+	  if (concat)
+	    // generate CONCAT expression for keys.
+	    varToExpr(r2rState.varmap, sparql.VarAssignable(v), stem)
+	  else
+	    varToAttribute(r2rState.varmap, sparql.VarAssignable(v))
+	/** Projection alias. */
+	val as = sql.AttrAlias(attrAliasNameFromVar(sparql.VarAssignable(v)))
+	attrs + sql.NamedAttribute(exp , as)
+      })
 
-    /* Construct the generated query as an abstract syntax. */
+    /** Construct the generated query as an abstract syntax. */
     val select = sql.Select(
       sql.AttributeList(attrlist),
       sql.TableList(r2rState.joins),
@@ -917,7 +992,7 @@
     )
     // println("r2rState.varmap: " + r2rState.varmap)
     // println("select.expression: " + select.expression)
-    select.makePretty()
+    select.makePretty() // eliminate foo.bar=foo.bar and stuff like that.
   }
 }