changeset 16:cc6c7c639fe8

~ the XdmNode constructor is now mixable
author Alexandre Bertails <bertails@w3.org>
date Wed, 03 Mar 2010 17:31:24 -0500
parents b8d7acb0f7b2
children ba437c299c6a
files src/main/scala/Main.scala src/main/scala/Tidy.scala src/main/scala/XML.scala src/test/scala/XMLTest.scala
diffstat 4 files changed, 32 insertions(+), 21 deletions(-) [+]
line wrap: on
line diff
--- a/src/main/scala/Main.scala	Wed Mar 03 11:34:58 2010 -0500
+++ b/src/main/scala/Main.scala	Wed Mar 03 17:31:24 2010 -0500
@@ -11,7 +11,7 @@
   /**
    * the inner parsed XML document from the url
    */
-  val doc:xml.Node = xml.Node(url)
+  val doc:xml.Node = xml.Node.htmlNode(url)
 
   final val divHead = "//html:div[@class='head']"
 
--- a/src/main/scala/Tidy.scala	Wed Mar 03 11:34:58 2010 -0500
+++ b/src/main/scala/Tidy.scala	Wed Mar 03 17:31:24 2010 -0500
@@ -1,8 +1,9 @@
 package org.w3.util
 
-import java.io.{InputStream, OutputStream}
+import org.w3.util.xml._
+import java.io.{InputStream, OutputStream, ByteArrayInputStream, ByteArrayOutputStream}
 
-object Tidy {
+trait Tidy extends NodeInjector {
 
   /**
    * utility method that invokes the jtidy library
@@ -10,12 +11,15 @@
    * encoding is the expected encoding for in
    * see http://jtidy.sourceforge.net/howto.html
    */
-  def tidy(in:InputStream, out:OutputStream, encoding:String):Unit = {
+  override def getBodyAndCharset(url:String):(InputStream, String) = {
+    val (in:InputStream, encoding:String) = super.getBodyAndCharset(url)
     val tidy = new org.w3c.tidy.Tidy()
     tidy.setXHTML(true)
     tidy.setInputEncoding(encoding)
-    tidy.setOutputEncoding("UTF-8");
+    tidy.setOutputEncoding("UTF-8")
+    val out = new ByteArrayOutputStream()
     tidy.parse(in, out)
+    (new ByteArrayInputStream(out.toByteArray), encoding)
   }
 
 }
--- a/src/main/scala/XML.scala	Wed Mar 03 11:34:58 2010 -0500
+++ b/src/main/scala/XML.scala	Wed Mar 03 17:31:24 2010 -0500
@@ -10,7 +10,7 @@
 import scala.xml.Elem
 import java.net._
 
-object Node {
+class NodeInjector() {
 
   ResponseCache.setDefault(CatalogLike)
 
@@ -27,7 +27,7 @@
 
       (new FileInputStream(url substring 7), DEFAULTCHARSET)
 
-    } else { // we assume it's a regulare URL that dispatch can handle, like http://
+    } else { // we assume it's a regular URL that dispatch can handle, like http://
 
       val body = new ByteArrayOutputStream()
 
@@ -59,12 +59,11 @@
 
     // we store the full bodies in bytearrays to be able to read from them
 
-    val tidyBody = new ByteArrayOutputStream()
     val (body, charset) = getBodyAndCharset(url)
 
     // call the jtidy library to generate a valid xhtml document
     // the charset corresponds to the one from the http response
-    Tidy.tidy(body, tidyBody, charset)
+//    val (tidyBody, _) = Tidy.tidy(body, charset)
 
     // val f = new FileWriter("/tmp/out.xml")
     // f.write(tidyBody.toString)
@@ -78,7 +77,7 @@
     builder.setWhitespaceStrippingPolicy(WhitespaceStrippingPolicy.ALL)
     //builder.setDTDValidation(false)
 
-    val doc:XdmNode = builder.build(new StreamSource(new ByteArrayInputStream(tidyBody.toByteArray)))
+    val doc:XdmNode = builder.build(new StreamSource(body))
 
     // "closing a ByteArrayOutputStream has no effect", so we don't do it
 
@@ -86,20 +85,26 @@
     doc
   }
 
+}
+
+object Node {
+
   /**
    * tranform a Saxon XdmItem into a Scala Elem
    */
   def XdmNode2Elem(node:XdmNode):Elem = scala.xml.XML.load(new StringReader(node.toString)) 
 
-  def apply(url:String):Node = Node(convertUrlToXdmNode(url))
+  val TRD = "http://www.w3.org/2001/10/trdoc-data.xsl"
+  val HTML = Map("html" -> "http://www.w3.org/1999/xhtml",
+		 "trd" -> TRD)
 
-  implicit def XdmNode2Node(node:XdmNode) = Node(node)
+  val tidy = new NodeInjector with Tidy
+
+  def htmlNode(uri:String):Node = new Node(tidy.convertUrlToXdmNode(uri), HTML)
 
 }
 
-case class Node(node:XdmNode) {
-
-  val NAMESPACE = "http://www.w3.org/2001/10/trdoc-data.xsl"
+case class Node(node:XdmNode, namespaces:Map[String, String]) {
 
   /**
    * prepare an XPath selector ready to be used against this node
@@ -108,16 +113,18 @@
   def getXPathSelector(xpath:String, variables:List[(String, Node)]):XPathSelector = {
     val proc:Processor = node.getProcessor
     val xpathCompiler:XPathCompiler = proc.newXPathCompiler();
+    for((prefix, uri) <- namespaces)
+      xpathCompiler.declareNamespace(prefix, uri)
     xpathCompiler.declareNamespace("html", "http://www.w3.org/1999/xhtml")
-    xpathCompiler.declareNamespace("trd", NAMESPACE)
+    xpathCompiler.declareNamespace("trd", Node.TRD)
     for((v, _) <- variables)
-      xpathCompiler.declareVariable(new QName(NAMESPACE, v),
+      xpathCompiler.declareVariable(new QName(Node.TRD, v),
 				    ItemType.ANY_NODE,
 				    OccurrenceIndicator.ONE_OR_MORE)
     val selector:XPathSelector = xpathCompiler.compile(xpath).load()
     selector.setContextItem(node)
     for((v, n) <- variables)
-      selector.setVariable(new QName(NAMESPACE, v), n.node)
+      selector.setVariable(new QName(Node.TRD, v), n.node)
     selector
   }
 
@@ -132,7 +139,7 @@
    */
   def evaluateSingle(xpath:String, variables:(String, Node)*):Option[Node] = {
     val selector = getXPathSelector(xpath, variables.toList)
-    Option(selector.evaluateSingle()) flatMap ((item:XdmItem) => Some(Node(item.asInstanceOf[XdmNode])))
+    Option(selector.evaluateSingle()) flatMap ((item:XdmItem) => Some(Node(item.asInstanceOf[XdmNode], namespaces)))
   }
 
   /**
@@ -141,7 +148,7 @@
    */
   def evaluate(xpath:String, variables:(String, Node)*):Iterator[Node] = {
     val selector = getXPathSelector(xpath, variables.toList)
-    JIteratorWrapper(selector.iterator()) map ((item:XdmItem) => Node(item.asInstanceOf[XdmNode]))
+    JIteratorWrapper(selector.iterator()) map ((item:XdmItem) => Node(item.asInstanceOf[XdmNode], namespaces))
   }
 
   private def evaluateAsXdmAtomicValues(xpath:String, variables:List[(String, Node)]):Iterator[XdmAtomicValue] = {
--- a/src/test/scala/XMLTest.scala	Wed Mar 03 11:34:58 2010 -0500
+++ b/src/test/scala/XMLTest.scala	Wed Mar 03 17:31:24 2010 -0500
@@ -6,7 +6,7 @@
 
   test("xpath requests") {
 
-    val document = Node("file://src/test/resources/WD-xmldsig-properties-20100204.html")
+    val document = Node.htmlNode("file://src/test/resources/WD-xmldsig-properties-20100204.html")
 
     assert(document.evaluateSingle("//html:h2") isDefined)
 
Set up and maintained by W3C Systems Team, please report bugs to sysreq@w3.org.

W3C would like to thank Microsoft who donated the server that allows us to run this service.