changeset 47:f0491f915432

+ jtidy, but it seems there is a bug in jtidy
author Alexandre Bertails <bertails@w3.org>
date Wed, 28 Sep 2011 13:36:43 -0400
parents c7b8bbeb877a
children 0991de4541f4
files src/main/scala/Main.scala
diffstat 1 files changed, 46 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/src/main/scala/Main.scala	Wed Sep 28 10:30:22 2011 -0400
+++ b/src/main/scala/Main.scala	Wed Sep 28 13:36:43 2011 -0400
@@ -8,9 +8,12 @@
 
 import net.sf.saxon.s9api._
 import javax.xml.transform.stream.StreamSource
+// import java.io.{InputStream, OutputStream, ByteArrayInputStream, ByteArrayOutputStream}
 import java.io.{Console => _, _}
 import java.net._
 
+import org.w3c.tidy.Tidy
+
 /*
  * - tests
  * - corriger url de base
@@ -120,7 +123,7 @@
 
         val paramsW = defaultParams ++ paramsNoURI + ( "doc_uri" -> spec )
 
-        pubrulesWrapper using paramsW applyOn (Source(spec)) getResponder()
+        pubrulesWrapper using paramsW applyOn (Source.fromTidy(spec)) getResponder()
 
       }
     }
@@ -195,7 +198,7 @@
 
   def apply(url:URL):XSL = XSL(Source(url))
 
-  def apply(url:String):XSL = XSL(Source(url))
+  def apply(url:String):XSL = XSL(new URL(url))
   
 }
 
@@ -208,5 +211,46 @@
     streamSource.setSystemId(url.toString)
     Source(streamSource)
     }
+
   def apply(url:String):Source = Source(new URL(url))
+
+  import dispatch._
+
+  def tidy(in:InputStream, charset:String):InputStream = {
+    val tidy = new Tidy
+    // tidy_options = ["-n", "-asxml", "-q", "--force-output","yes", "--show-warnings", "no"]
+    tidy.setXHTML(true)
+    println(charset)
+//    tidy.setInputEncoding(charset.toUpperCase)
+    tidy.setInputEncoding("utf8")
+    tidy.setOutputEncoding("utf8")
+    //tidy.setNumEntities(true)
+    val out = new ByteArrayOutputStream
+    // tidy.parse(in, out)
+    // new ByteArrayInputStream(out.toByteArray)
+    println("\n\n\n\n\n\n===========================\n\n\n\n\n\n")
+    tidy.parse(in, new FileOutputStream("/tmp/out.xml"))
+    new FileInputStream("/tmp/out.xml")
+  }
+
+
+  def tidytest(url:String, charin:String, charout:String):Unit = {
+    val tidy = new Tidy
+    val in = new URL(url).openStream()
+    tidy.setXHTML(true)
+    tidy.setInputEncoding(charin)
+    tidy.setOutputEncoding(charout)
+    //tidy.setNumEntities(true)
+    tidy.parse(in, new FileOutputStream("/tmp/out.xml"))
+  }
+
+  def fromTidy(url:URL):Source = {
+    val is:InputStream = Http(new Request(url.toString) >> (tidy _))
+    val streamSource = new StreamSource(is)
+    streamSource.setSystemId(url.toString)
+    Source(streamSource)
+  }
+
+  def fromTidy(url:String):Source = Source.fromTidy(new URL(url))
+
 }
Set up and maintained by W3C Systems Team, please report bugs to sysreq@w3.org.

W3C would like to thank Microsoft who donated the server that allows us to run this service.