The server did not cope well with initial failure followed by subsequent success. This is an initial fix for that. The procedure is for the verification to first check data in cached graphs and if that fails to check from fresh web information. Renamed WebCache to GraphCache. WebCache would be another service that would know how to cache representations. Updated to latest version of guava, the google collections library. webid
authorHenry Story <henry.story@bblfish.net>
Thu, 05 Jan 2012 23:55:46 +0100
branchwebid
changeset 167 a38d404047ce
parent 166 fc3c5c54f72b
child 168 5be56759c8cf
The server did not cope well with initial failure followed by subsequent success. This is an initial fix for that. The procedure is for the verification to first check data in cached graphs and if that fails to check from fresh web information. Renamed WebCache to GraphCache. WebCache would be another service that would know how to cache representations. Updated to latest version of guava, the google collections library.
project/build.scala
src/main/scala/Filesystem.scala
src/main/scala/GraphCache.scala
src/main/scala/Resource.scala
src/main/scala/WebCache.scala
src/main/scala/auth/Authz.scala
src/main/scala/auth/Principals.scala
src/main/scala/auth/WebIDSrvc.scala
src/main/scala/auth/WebIdClaim.scala
src/main/scala/auth/X509Claim.scala
src/main/scala/sommer/ResourceReader.scala
src/test/scala/auth/secure_specs.scala
--- a/project/build.scala	Thu Jan 05 00:48:40 2012 +0100
+++ b/project/build.scala	Thu Jan 05 23:55:46 2012 +0100
@@ -33,7 +33,7 @@
   val grizzled = "org.clapper" %% "grizzled-scala" % "1.0.8" % "test"
   val scalaz = "org.scalaz" %% "scalaz-core" % "6.0.3"
   val argot =  "org.clapper" %% "argot" % "0.3.5"
-  val guava =  "com.google.guava" % "guava" % "10.0.1"
+  val guava =  "com.google.guava" % "guava" % "11.0"
 //  val restlet = "org.restlet.dev" % "org.restlet" % "2.1-SNAPSHOT"
 //  val restlet_ssl = "org.restlet.dev" % "org.restlet.ext.ssl" % "2.1-SNAPSHOT"
   val jsslutils = "org.jsslutils" % "jsslutils" % "1.0.5"
--- a/src/main/scala/Filesystem.scala	Thu Jan 05 00:48:40 2012 +0100
+++ b/src/main/scala/Filesystem.scala	Thu Jan 05 23:55:46 2012 +0100
@@ -45,7 +45,7 @@
       logger.debug("%s successfully created: %s" format (fileOnDisk.getAbsolutePath, r.toString))
     }
     
-    def get(): Validation[Throwable, Model] = {
+    def get(unused: CacheControl.Value = CacheControl.CacheFirst): Validation[Throwable, Model] = {
       val model = ModelFactory.createDefaultModel()
       val guessLang = fileOnDisk.getName match {
         case Authoritative.r(_,suffix) => Representation.fromSuffix(suffix) match {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/main/scala/GraphCache.scala	Thu Jan 05 23:55:46 2012 +0100
@@ -0,0 +1,127 @@
+ /*
+ * Copyright (c) 2011 Henry Story (bblfish.net)
+ * under the MIT licence defined
+ *    http://www.opensource.org/licenses/mit-license.html
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in the
+ * Software without restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */ 
+
+package org.w3.readwriteweb
+
+import com.hp.hpl.jena.rdf.model.Model
+import org.apache.http.MethodNotSupportedException
+import org.w3.readwriteweb.util._
+import java.net.{ConnectException, URL}
+import scalaz.{Scalaz, Validation}
+import java.util.concurrent.TimeUnit
+import com.google.common.cache.{LoadingCache, CacheLoader, CacheBuilder, Cache}
+
+
+
+/**
+ * Fetch resources on the Web and cache them
+ * ( at a later point this would include saving them to an indexed quad store )
+ *
+ * @author Henry Story
+ * @created: 12/10/2011
+ *
+ */
+object GraphCache extends ResourceManager  {
+  import dispatch._
+  import Scalaz._
+
+  //this is a simple but quite stupid web cache so that graphs can stay in memory and be used a little
+  // bit across sessions
+  val cache: LoadingCache[URL,Validation[Throwable,Model]] =
+       CacheBuilder.newBuilder()
+         .expireAfterAccess(5, TimeUnit.MINUTES)
+         .softValues()
+//         .expireAfterWrite(30, TimeUnit.MINUTES)
+       .build(new CacheLoader[URL, Validation[Throwable,Model]] {
+         def load(url: URL) = getUrl(url)
+       })
+
+  val http = new Http with thread.Safety
+  
+  def basePath = null //should be cache dir?
+
+  def sanityCheck() = true  //cache dire exists? But is this needed for functioning?
+
+  def resource(u : URL) = new org.w3.readwriteweb.Resource {
+    import CacheControl._
+    def name() = u
+    def get(cacheControl: CacheControl.Value = CacheControl.CacheOnly) = cacheControl match {
+      case CacheOnly => {
+        val res = cache.getIfPresent(u)
+        if (null==res) NoCachEntry.fail
+        else res
+      }
+      case CacheFirst => cache.get(u)
+      case NoCache => {
+        val res = getUrl(u)
+        cache.put(u,res) //todo: should this only be done if say the returned value is not an error?
+        res
+      }
+    }
+    // when fetching information from the web creating directories does not make sense
+    //perhaps the resource manager should be split into read/write sections?
+    def save(model: Model) =  throw new MethodNotSupportedException("not implemented")
+
+    def createDirectory(model: Model) =  throw new MethodNotSupportedException("not implemented")
+  }
+
+  private def getUrl(u: URL) = {
+
+      // note we prefer rdf/xml and turtle over html, as html does not always contain rdfa, and we prefer those over n3,
+      // as we don't have a full n3 parser. Better would be to have a list of available parsers for whatever rdf framework is
+      // installed (some claim to do n3 when they only really do turtle)
+      // we can't currently accept */* as we don't have GRDDL implemented
+      val request = url(u.toString) <:< Map("Accept"->
+        "application/rdf+xml,text/turtle,application/xhtml+xml;q=0.8,text/html;q=0.7,text/n3;q=0.6")
+
+      //we need to tell the model about the content type
+      val handler: Handler[Validation[Throwable, Model]] = request.>+>[Validation[Throwable, Model]](res =>  {
+        res >:> { headers =>
+          val encoding = headers("Content-Type").headOption match {
+            case Some(mime) => {
+              Lang(mime.split(";")(0)) getOrElse Lang.default
+            }
+            case None => RDFXML  //todo: it would be better to try to do a bit of guessing in this case by looking at content
+          }
+          val loc = headers("Content-Location").headOption match {
+            case Some(loc) =>  new URL(u,loc)
+            case None => new URL(u.getProtocol,u.getAuthority,u.getPort,u.getPath)
+          }
+          res>>{ in=> modelFromInputStream(in,loc,encoding) }
+
+        }
+      })
+      try {
+        val future = http(handler)
+        future
+      } catch {
+        case e: ConnectException => e.fail
+      }
+
+    }
+
+
+   override def finalize() { http.shutdown() }
+}
+
+ object NoCachEntry extends Exception
\ No newline at end of file
--- a/src/main/scala/Resource.scala	Thu Jan 05 00:48:40 2012 +0100
+++ b/src/main/scala/Resource.scala	Thu Jan 05 23:55:46 2012 +0100
@@ -13,9 +13,13 @@
   def resource(url:URL):Resource
 }
 
+object CacheControl extends Enumeration {
+  val CacheOnly, CacheFirst, NoCache = Value
+}
+
 trait Resource {
   def name: URL
-  def get():Validation[Throwable, Model]
+  def get(policy: CacheControl.Value = CacheControl.CacheFirst): Validation[Throwable, Model]
   def save(model:Model):Validation[Throwable, Unit]
   def createDirectory(model: Model): Validation[Throwable, Unit]
 }
--- a/src/main/scala/WebCache.scala	Thu Jan 05 00:48:40 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,110 +0,0 @@
- /*
- * Copyright (c) 2011 Henry Story (bblfish.net)
- * under the MIT licence defined
- *    http://www.opensource.org/licenses/mit-license.html
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy of
- * this software and associated documentation files (the "Software"), to deal in the
- * Software without restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to the
- * following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
- * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
- * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */ 
-
-package org.w3.readwriteweb
-
-import com.hp.hpl.jena.rdf.model.Model
-import org.apache.http.MethodNotSupportedException
-import org.w3.readwriteweb.util._
-import java.net.{ConnectException, URL}
-import scalaz.{Scalaz, Validation}
-import java.util.concurrent.TimeUnit
-import com.google.common.cache.{CacheLoader, CacheBuilder, Cache}
-
-
-/**
- * @author Henry Story
- * @created: 12/10/2011
- *
- * The WebCache currently does not cache
- */
-object WebCache extends ResourceManager  {
-  import dispatch._
-  import Scalaz._
-
-  //this is a simple but quite stupid web cache so that graphs can stay in memory and be used a little
-  // bit across sessions
-  val cache: Cache[URL,Validation[Throwable,Model]] =
-       CacheBuilder.newBuilder()
-         .expireAfterAccess(5, TimeUnit.MINUTES)
-//         .softValues()
-//         .expireAfterWrite(30, TimeUnit.MINUTES)
-       .build(new CacheLoader[URL, Validation[Throwable,Model]] {
-         def load(url: URL) = getUrl(url)
-       })
-
-  val http = new Http with thread.Safety
-  
-  def basePath = null //should be cache dir?
-
-  def sanityCheck() = true  //cache dire exists? But is this needed for functioning?
-
-  def resource(u : URL) = new org.w3.readwriteweb.Resource {
-    def name() = u
-    def get() = cache.get(u)
-
-    // when fetching information from the web creating directories does not make sense
-    //perhaps the resource manager should be split into read/write sections?
-    def save(model: Model) =  throw new MethodNotSupportedException("not implemented")
-
-    def createDirectory(model: Model) =  throw new MethodNotSupportedException("not implemented")
-  }
-
-  private def getUrl(u: URL) = {
-
-      // note we prefer rdf/xml and turtle over html, as html does not always contain rdfa, and we prefer those over n3,
-      // as we don't have a full n3 parser. Better would be to have a list of available parsers for whatever rdf framework is
-      // installed (some claim to do n3 when they only really do turtle)
-      // we can't currently accept */* as we don't have GRDDL implemented
-      val request = url(u.toString) <:< Map("Accept"->
-        "application/rdf+xml,text/turtle,application/xhtml+xml;q=0.8,text/html;q=0.7,text/n3;q=0.6")
-
-      //we need to tell the model about the content type
-      val handler: Handler[Validation[Throwable, Model]] = request.>+>[Validation[Throwable, Model]](res =>  {
-        res >:> { headers =>
-          val encoding = headers("Content-Type").headOption match {
-            case Some(mime) => {
-              Lang(mime.split(";")(0)) getOrElse Lang.default
-            }
-            case None => RDFXML  //todo: it would be better to try to do a bit of guessing in this case by looking at content
-          }
-          val loc = headers("Content-Location").headOption match {
-            case Some(loc) =>  new URL(u,loc)
-            case None => new URL(u.getProtocol,u.getAuthority,u.getPort,u.getPath)
-          }
-          res>>{ in=> modelFromInputStream(in,loc,encoding) }
-
-        }
-      })
-      try {
-        val future = http(handler)
-        future
-      } catch {
-        case e: ConnectException => e.fail
-      }
-
-    }
-
-
-   override def finalize() { http.shutdown() }
-}
--- a/src/main/scala/auth/Authz.scala	Thu Jan 05 00:48:40 2012 +0100
+++ b/src/main/scala/auth/Authz.scala	Thu Jan 05 23:55:46 2012 +0100
@@ -30,7 +30,7 @@
 import com.hp.hpl.jena.query.{QueryExecutionFactory, QuerySolutionMap, QueryFactory}
 import unfiltered.response.{ResponseFunction, Unauthorized}
 import com.hp.hpl.jena.rdf.model.ResourceFactory
-import org.w3.readwriteweb.{Authoritative, Resource, ResourceManager, WebCache}
+import org.w3.readwriteweb.{Authoritative, Resource, ResourceManager, GraphCache}
 import org.w3.readwriteweb.util.HttpMethod
 
 /**
--- a/src/main/scala/auth/Principals.scala	Thu Jan 05 00:48:40 2012 +0100
+++ b/src/main/scala/auth/Principals.scala	Thu Jan 05 23:55:46 2012 +0100
@@ -24,12 +24,12 @@
 package org.w3.readwriteweb.auth
 
 import java.security.Principal
-import org.w3.readwriteweb.WebCache
 import com.hp.hpl.jena.rdf.model.Model
 import com.hp.hpl.jena.shared.WrappedIOException
 import scalaz.{Scalaz, Validation}
 import Scalaz._
 import java.net.{ConnectException, URL}
+import org.w3.readwriteweb.{CacheControl, GraphCache}
 
 /**
  * @author Henry Story from http://bblfish.net/
@@ -75,10 +75,10 @@
     case _ => false
   }
 
-  //TODO: now that we are no longer passing the WebCache around it's questionable whether we still need this method
+  //TODO: now that we are no longer passing the GraphCache around it's questionable whether we still need this method
   //in this class
-  def getDefiningModel: Validation[ProfileError, Model] =
-    WebCache.resource(url).get() failMap {
+  def getDefiningModel(cacheControl: CacheControl.Value = CacheControl.CacheFirst): Validation[ProfileError, Model] =
+    GraphCache.resource(url).get(cacheControl) failMap {
       case ioe: WrappedIOException => new ProfileGetError("error fetching profile", Some(ioe),url)
       case connE : ConnectException => new ProfileGetError("error fetching profile", Some(connE),url)
       case other => new ProfileParseError("error parsing profile", Some(other),url)
--- a/src/main/scala/auth/WebIDSrvc.scala	Thu Jan 05 00:48:40 2012 +0100
+++ b/src/main/scala/auth/WebIDSrvc.scala	Thu Jan 05 23:55:46 2012 +0100
@@ -166,7 +166,7 @@
           claim match {
             case NoClaim => <span/>
             case _ => new Transform(node) {
-              val union = claim.verified.flatMap(_.getDefiningModel.toOption).fold(ModelFactory.createDefaultModel()) {
+              val union = claim.verified.flatMap(_.getDefiningModel().toOption).fold(ModelFactory.createDefaultModel()) {
                 (m1, m2) => m1.add(m2)
               }
               //this works because we have verified before
--- a/src/main/scala/auth/WebIdClaim.scala	Thu Jan 05 00:48:40 2012 +0100
+++ b/src/main/scala/auth/WebIdClaim.scala	Thu Jan 05 23:55:46 2012 +0100
@@ -32,6 +32,8 @@
 import com.hp.hpl.jena.query._
 import java.math.BigInteger
 import com.hp.hpl.jena.datatypes.xsd.XSDDatatype
+import org.w3.readwriteweb.CacheControl
+import scalaz.{Failure, Scalaz, Validation}
 
 
 /**
@@ -95,9 +97,15 @@
       }
   }
 
-  lazy val verify: Validation[WebIDClaimFailure, WebID] = key match {
+  def verify: Validation[WebIDClaimFailure, WebID] = key match {
       case rsakey: RSAPublicKey =>
-        WebID(san).flatMap(webid=> webid.getDefiningModel.flatMap(rsaTest(webid, rsakey)) )
+        WebID(san).flatMap(webid=> {
+          webid.getDefiningModel(CacheControl.CacheOnly).flatMap(rsaTest(webid, rsakey)) match {
+            case Failure(_) => webid.getDefiningModel(CacheControl.NoCache).flatMap(rsaTest(webid, rsakey))
+            case o => o
+          }
+        }
+        )
       case _ => new UnsupportedKeyType("We only support RSA keys at present", key).fail
   }
 }
--- a/src/main/scala/auth/X509Claim.scala	Thu Jan 05 00:48:40 2012 +0100
+++ b/src/main/scala/auth/X509Claim.scala	Thu Jan 05 23:55:46 2012 +0100
@@ -47,7 +47,7 @@
   implicit val fetch = true //fetch the certificate if we don't have it
 
 // this is cool because it is not in danger of running out of memory but it makes it impossible to create the claim
-// with an implicit  WebCache...
+// with an implicit  GraphCache...
   val idCache: Cache[X509Certificate, X509Claim] =
      CacheBuilder.newBuilder()
      .expireAfterWrite(30, TimeUnit.MINUTES)
@@ -136,7 +136,7 @@
     new WebIDClaim(webid, cert.getPublicKey.asInstanceOf[RSAPublicKey]) 
   }
 
-  lazy val verified: List[WebID] = claims.flatMap(_.verify.toOption)
+  def verified: List[WebID] = claims.flatMap(_.verify.toOption)
 
   //note could also implement Destroyable
   //
--- a/src/main/scala/sommer/ResourceReader.scala	Thu Jan 05 00:48:40 2012 +0100
+++ b/src/main/scala/sommer/ResourceReader.scala	Thu Jan 05 23:55:46 2012 +0100
@@ -26,7 +26,7 @@
 import com.hp.hpl.jena.vocabulary.RDF
 import com.hp.hpl.jena.sparql.vocabulary.FOAF
 import java.lang.String
-import org.w3.readwriteweb.{Resource, WebCache}
+import org.w3.readwriteweb.{Resource, GraphCache}
 import scalaz.Validation
 import java.net.URL
 import collection._
@@ -107,7 +107,7 @@
   type Val[A] = Validation[scala.Throwable,A]
  
   def findPeople(m: Resource): Validation[scala.Throwable,Set[Person]] = {
-     for (gr<-m.get) yield {
+     for (gr<-m.get()) yield {
        for (st <- gr.listStatements(null,RDF.`type`,FOAF.Person).asScala;
         val subj = st.getSubject;
         st2 <- gr.listStatements(subj, FOAF.name,null).asScala
@@ -137,13 +137,13 @@
   }
 
   def findDefinedPeople(m: Resource): Validation[scala.Throwable,Set[IdPerson]] = {
-    for (gr<-m.get) yield {
+    for (gr<-m.get()) yield {
       definedPeople(gr, m.name)
     }.toSet
   }
   
   def findIdPeople(m: Resource): Val[Set[IdPerson]] = {
-    for (gr<-m.get) yield {
+    for (gr<-m.get()) yield {
       for (st <- gr.listStatements(null,RDF.`type`,FOAF.Person).asScala;
            val subj = st.getSubject;
            if (subj.isURIResource)
@@ -162,7 +162,7 @@
 }
 
 object Test {
-  implicit def urlToResource(u: URL) = WebCache.resource(u)
+  implicit def urlToResource(u: URL) = GraphCache.resource(u)
   import System._
 
   val peopleRd = new ResourceReader[Set[Person]](Extractors.findPeople)
@@ -170,7 +170,7 @@
   val idPeopleRd = new ResourceReader[Set[IdPerson]](Extractors.findIdPeople)
   val definedPeopleFriends = definedPeopleRd.flatMap(people =>ResourceReader[Set[IdPerson]]{
     resource: Resource =>
-       resource.get.map(gr=>
+       resource.get().map(gr=>
          for ( p <- people;
                st <- gr.listStatements(p.id, FOAF.knows, null).asScala ;
               val friend = st.getObject;
--- a/src/test/scala/auth/secure_specs.scala	Thu Jan 05 00:48:40 2012 +0100
+++ b/src/test/scala/auth/secure_specs.scala	Thu Jan 05 23:55:46 2012 +0100
@@ -93,7 +93,7 @@
 
 
 
-  val webCache = WebCache
+  val webCache = GraphCache
   val serverSslContext = javax.net.ssl.SSLContext.getInstance("TLS");