Add java htmlparser sources that match the original 52-level state

https://hg.mozilla.org/projects/htmlparser/
Commit: abe62ab2a9b69ccb3b5d8a231ec1ae11154c571d
This commit is contained in:
Matt A. Tobin
2020-01-15 14:56:04 -05:00
parent 09314667a6
commit 6168dbe21f
248 changed files with 62553 additions and 0 deletions
@@ -0,0 +1,12 @@
<module>
<inherits name="com.google.gwt.core.Core"/>
<inherits name="com.google.gwt.user.User"/>
<super-source path="translatable"/>
<source path="annotation"/>
<source path="common"/>
<source path="impl"/>
<source path="gwt"/>
<set-property name="user.agent" value="gecko1_8"/>
<entry-point class="nu.validator.htmlparser.gwt.HtmlParserModule"/>
<add-linker name="sso"/>
</module>
@@ -0,0 +1,477 @@
/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2008-2009 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.gwt;
import java.util.LinkedList;
import nu.validator.htmlparser.common.DocumentMode;
import nu.validator.htmlparser.impl.CoalescingTreeBuilder;
import nu.validator.htmlparser.impl.HtmlAttributes;
import org.xml.sax.SAXException;
import com.google.gwt.core.client.JavaScriptException;
import com.google.gwt.core.client.JavaScriptObject;
class BrowserTreeBuilder extends CoalescingTreeBuilder<JavaScriptObject> {
private JavaScriptObject document;
private JavaScriptObject script;
private JavaScriptObject placeholder;
private boolean readyToRun;
private final LinkedList<ScriptHolder> scriptStack = new LinkedList<ScriptHolder>();
private class ScriptHolder {
private final JavaScriptObject script;
private final JavaScriptObject placeholder;
/**
* @param script
* @param placeholder
*/
public ScriptHolder(JavaScriptObject script,
JavaScriptObject placeholder) {
this.script = script;
this.placeholder = placeholder;
}
/**
* Returns the script.
*
* @return the script
*/
public JavaScriptObject getScript() {
return script;
}
/**
* Returns the placeholder.
*
* @return the placeholder
*/
public JavaScriptObject getPlaceholder() {
return placeholder;
}
}
protected BrowserTreeBuilder(JavaScriptObject document) {
super();
this.document = document;
installExplorerCreateElementNS(document);
}
private static native boolean installExplorerCreateElementNS(
JavaScriptObject doc) /*-{
if (!doc.createElementNS) {
doc.createElementNS = function (uri, local) {
if ("http://www.w3.org/1999/xhtml" == uri) {
return doc.createElement(local);
} else if ("http://www.w3.org/1998/Math/MathML" == uri) {
if (!doc.mathplayerinitialized) {
var obj = document.createElement("object");
obj.setAttribute("id", "mathplayer");
obj.setAttribute("classid", "clsid:32F66A20-7614-11D4-BD11-00104BD3F987");
document.getElementsByTagName("head")[0].appendChild(obj);
document.namespaces.add("m", "http://www.w3.org/1998/Math/MathML", "#mathplayer");
doc.mathplayerinitialized = true;
}
return doc.createElement("m:" + local);
} else if ("http://www.w3.org/2000/svg" == uri) {
if (!doc.renesisinitialized) {
var obj = document.createElement("object");
obj.setAttribute("id", "renesis");
obj.setAttribute("classid", "clsid:AC159093-1683-4BA2-9DCF-0C350141D7F2");
document.getElementsByTagName("head")[0].appendChild(obj);
document.namespaces.add("s", "http://www.w3.org/2000/svg", "#renesis");
doc.renesisinitialized = true;
}
return doc.createElement("s:" + local);
} else {
// throw
}
}
}
}-*/;
private static native boolean hasAttributeNS(JavaScriptObject element,
String uri, String localName) /*-{
return element.hasAttributeNS(uri, localName);
}-*/;
private static native void setAttributeNS(JavaScriptObject element,
String uri, String localName, String value) /*-{
element.setAttributeNS(uri, localName, value);
}-*/;
@Override protected void addAttributesToElement(JavaScriptObject element,
HtmlAttributes attributes) throws SAXException {
try {
for (int i = 0; i < attributes.getLength(); i++) {
String localName = attributes.getLocalNameNoBoundsCheck(i);
String uri = attributes.getURINoBoundsCheck(i);
if (!hasAttributeNS(element, uri, localName)) {
setAttributeNS(element, uri, localName,
attributes.getValueNoBoundsCheck(i));
}
}
} catch (JavaScriptException e) {
fatal(e);
}
}
private static native void appendChild(JavaScriptObject parent,
JavaScriptObject child) /*-{
parent.appendChild(child);
}-*/;
private static native JavaScriptObject createTextNode(JavaScriptObject doc,
String text) /*-{
return doc.createTextNode(text);
}-*/;
private static native JavaScriptObject getLastChild(JavaScriptObject node) /*-{
return node.lastChild;
}-*/;
private static native void extendTextNode(JavaScriptObject node, String text) /*-{
node.data += text;
}-*/;
@Override protected void appendCharacters(JavaScriptObject parent,
String text) throws SAXException {
try {
if (parent == placeholder) {
appendChild(script, createTextNode(document, text));
}
JavaScriptObject lastChild = getLastChild(parent);
if (lastChild != null && getNodeType(lastChild) == 3) {
extendTextNode(lastChild, text);
return;
}
appendChild(parent, createTextNode(document, text));
} catch (JavaScriptException e) {
fatal(e);
}
}
private static native boolean hasChildNodes(JavaScriptObject element) /*-{
return element.hasChildNodes();
}-*/;
private static native JavaScriptObject getFirstChild(
JavaScriptObject element) /*-{
return element.firstChild;
}-*/;
@Override protected void appendChildrenToNewParent(
JavaScriptObject oldParent, JavaScriptObject newParent)
throws SAXException {
try {
while (hasChildNodes(oldParent)) {
appendChild(newParent, getFirstChild(oldParent));
}
} catch (JavaScriptException e) {
fatal(e);
}
}
private static native JavaScriptObject createComment(JavaScriptObject doc,
String text) /*-{
return doc.createComment(text);
}-*/;
@Override protected void appendComment(JavaScriptObject parent,
String comment) throws SAXException {
try {
if (parent == placeholder) {
appendChild(script, createComment(document, comment));
}
appendChild(parent, createComment(document, comment));
} catch (JavaScriptException e) {
fatal(e);
}
}
@Override protected void appendCommentToDocument(String comment)
throws SAXException {
try {
appendChild(document, createComment(document, comment));
} catch (JavaScriptException e) {
fatal(e);
}
}
private static native JavaScriptObject createElementNS(
JavaScriptObject doc, String ns, String local) /*-{
return doc.createElementNS(ns, local);
}-*/;
@Override protected JavaScriptObject createElement(String ns, String name,
HtmlAttributes attributes) throws SAXException {
try {
JavaScriptObject rv = createElementNS(document, ns, name);
for (int i = 0; i < attributes.getLength(); i++) {
setAttributeNS(rv, attributes.getURINoBoundsCheck(i),
attributes.getLocalNameNoBoundsCheck(i),
attributes.getValueNoBoundsCheck(i));
}
if ("script" == name) {
if (placeholder != null) {
scriptStack.addLast(new ScriptHolder(script, placeholder));
}
script = rv;
placeholder = createElementNS(document,
"http://n.validator.nu/placeholder/", "script");
rv = placeholder;
for (int i = 0; i < attributes.getLength(); i++) {
setAttributeNS(rv, attributes.getURINoBoundsCheck(i),
attributes.getLocalNameNoBoundsCheck(i),
attributes.getValueNoBoundsCheck(i));
}
}
return rv;
} catch (JavaScriptException e) {
fatal(e);
throw new RuntimeException("Unreachable");
}
}
@Override protected JavaScriptObject createHtmlElementSetAsRoot(
HtmlAttributes attributes) throws SAXException {
try {
JavaScriptObject rv = createElementNS(document,
"http://www.w3.org/1999/xhtml", "html");
for (int i = 0; i < attributes.getLength(); i++) {
setAttributeNS(rv, attributes.getURINoBoundsCheck(i),
attributes.getLocalNameNoBoundsCheck(i),
attributes.getValueNoBoundsCheck(i));
}
appendChild(document, rv);
return rv;
} catch (JavaScriptException e) {
fatal(e);
throw new RuntimeException("Unreachable");
}
}
private static native JavaScriptObject getParentNode(
JavaScriptObject element) /*-{
return element.parentNode;
}-*/;
@Override protected void appendElement(JavaScriptObject child,
JavaScriptObject newParent) throws SAXException {
try {
if (newParent == placeholder) {
appendChild(script, cloneNodeDeep(child));
}
appendChild(newParent, child);
} catch (JavaScriptException e) {
fatal(e);
}
}
@Override protected boolean hasChildren(JavaScriptObject element)
throws SAXException {
try {
return hasChildNodes(element);
} catch (JavaScriptException e) {
fatal(e);
throw new RuntimeException("Unreachable");
}
}
private static native void insertBeforeNative(JavaScriptObject parent,
JavaScriptObject child, JavaScriptObject sibling) /*-{
parent.insertBefore(child, sibling);
}-*/;
private static native int getNodeType(JavaScriptObject node) /*-{
return node.nodeType;
}-*/;
private static native JavaScriptObject cloneNodeDeep(JavaScriptObject node) /*-{
return node.cloneNode(true);
}-*/;
/**
* Returns the document.
*
* @return the document
*/
JavaScriptObject getDocument() {
JavaScriptObject rv = document;
document = null;
return rv;
}
private static native JavaScriptObject createDocumentFragment(
JavaScriptObject doc) /*-{
return doc.createDocumentFragment();
}-*/;
JavaScriptObject getDocumentFragment() {
JavaScriptObject rv = createDocumentFragment(document);
JavaScriptObject rootElt = getFirstChild(document);
while (hasChildNodes(rootElt)) {
appendChild(rv, getFirstChild(rootElt));
}
document = null;
return rv;
}
/**
* @see nu.validator.htmlparser.impl.TreeBuilder#createJavaScriptObject(String,
* java.lang.String, org.xml.sax.Attributes, java.lang.Object)
*/
@Override protected JavaScriptObject createElement(String ns, String name,
HtmlAttributes attributes, JavaScriptObject form)
throws SAXException {
try {
JavaScriptObject rv = createElement(ns, name, attributes);
// rv.setUserData("nu.validator.form-pointer", form, null);
return rv;
} catch (JavaScriptException e) {
fatal(e);
return null;
}
}
/**
* @see nu.validator.htmlparser.impl.TreeBuilder#start()
*/
@Override protected void start(boolean fragment) throws SAXException {
script = null;
placeholder = null;
readyToRun = false;
}
protected void documentMode(DocumentMode mode, String publicIdentifier,
String systemIdentifier, boolean html4SpecificAdditionalErrorChecks)
throws SAXException {
// document.setUserData("nu.validator.document-mode", mode, null);
}
/**
* @see nu.validator.htmlparser.impl.TreeBuilder#elementPopped(java.lang.String,
* java.lang.String, java.lang.Object)
*/
@Override protected void elementPopped(String ns, String name,
JavaScriptObject node) throws SAXException {
if (node == placeholder) {
readyToRun = true;
requestSuspension();
}
}
private static native void replace(JavaScriptObject oldNode,
JavaScriptObject newNode) /*-{
oldNode.parentNode.replaceChild(newNode, oldNode);
}-*/;
private static native JavaScriptObject getPreviousSibling(JavaScriptObject node) /*-{
return node.previousSibling;
}-*/;
void maybeRunScript() {
if (readyToRun) {
readyToRun = false;
replace(placeholder, script);
if (scriptStack.isEmpty()) {
script = null;
placeholder = null;
} else {
ScriptHolder scriptHolder = scriptStack.removeLast();
script = scriptHolder.getScript();
placeholder = scriptHolder.getPlaceholder();
}
}
}
@Override protected void insertFosterParentedCharacters(String text,
JavaScriptObject table, JavaScriptObject stackParent)
throws SAXException {
try {
JavaScriptObject parent = getParentNode(table);
if (parent != null) { // always an element if not null
JavaScriptObject previousSibling = getPreviousSibling(table);
if (previousSibling != null
&& getNodeType(previousSibling) == 3) {
extendTextNode(previousSibling, text);
return;
}
insertBeforeNative(parent, createTextNode(document, text), table);
return;
}
JavaScriptObject lastChild = getLastChild(stackParent);
if (lastChild != null && getNodeType(lastChild) == 3) {
extendTextNode(lastChild, text);
return;
}
appendChild(stackParent, createTextNode(document, text));
} catch (JavaScriptException e) {
fatal(e);
}
}
@Override protected void insertFosterParentedChild(JavaScriptObject child,
JavaScriptObject table, JavaScriptObject stackParent)
throws SAXException {
JavaScriptObject parent = getParentNode(table);
try {
if (parent != null && getNodeType(parent) == 1) {
insertBeforeNative(parent, child, table);
} else {
appendChild(stackParent, child);
}
} catch (JavaScriptException e) {
fatal(e);
}
}
private static native void removeChild(JavaScriptObject parent,
JavaScriptObject child) /*-{
parent.removeChild(child);
}-*/;
@Override protected void detachFromParent(JavaScriptObject element)
throws SAXException {
try {
JavaScriptObject parent = getParentNode(element);
if (parent != null) {
removeChild(parent, element);
}
} catch (JavaScriptException e) {
fatal(e);
}
}
}
@@ -0,0 +1,265 @@
/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2007-2008 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.gwt;
import java.util.LinkedList;
import nu.validator.htmlparser.common.XmlViolationPolicy;
import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
import nu.validator.htmlparser.impl.Tokenizer;
import nu.validator.htmlparser.impl.UTF16Buffer;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import com.google.gwt.core.client.JavaScriptObject;
import com.google.gwt.user.client.Timer;
/**
* This class implements an HTML5 parser that exposes data through the DOM
* interface.
*
* <p>By default, when using the constructor without arguments, the
* this parser treats XML 1.0-incompatible infosets as fatal errors.
* This corresponds to
* <code>FATAL</code> as the general XML violation policy. To make the parser
* support non-conforming HTML fully per the HTML 5 spec while on the other
* hand potentially violating the DOM API contract, set the general XML
* violation policy to <code>ALLOW</code>. This does not work with a standard
* DOM implementation. Handling all input without fatal errors and without
* violating the DOM API contract is possible by setting
* the general XML violation policy to <code>ALTER_INFOSET</code>. <em>This
* makes the parser non-conforming</em> but is probably the most useful
* setting for most applications.
*
* <p>The doctype is not represented in the tree.
*
* <p>The document mode is represented as user data <code>DocumentMode</code>
* object with the key <code>nu.validator.document-mode</code> on the document
* node.
*
* <p>The form pointer is also stored as user data with the key
* <code>nu.validator.form-pointer</code>.
*
* @version $Id: HtmlDocumentBuilder.java 255 2008-05-29 08:57:38Z hsivonen $
* @author hsivonen
*/
public class HtmlParser {
private static final int CHUNK_SIZE = 512;
private final Tokenizer tokenizer;
private final BrowserTreeBuilder domTreeBuilder;
private final StringBuilder documentWriteBuffer = new StringBuilder();
private ErrorHandler errorHandler;
private UTF16Buffer stream;
private int streamLength;
private boolean lastWasCR;
private boolean ending;
private ParseEndListener parseEndListener;
private final LinkedList<UTF16Buffer> bufferStack = new LinkedList<UTF16Buffer>();
/**
* Instantiates the parser
*
* @param implementation
* the DOM implementation
* @param xmlPolicy the policy
*/
public HtmlParser(JavaScriptObject document) {
this.domTreeBuilder = new BrowserTreeBuilder(document);
this.tokenizer = new ErrorReportingTokenizer(domTreeBuilder);
this.domTreeBuilder.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET);
this.tokenizer.setCommentPolicy(XmlViolationPolicy.ALTER_INFOSET);
this.tokenizer.setContentNonXmlCharPolicy(XmlViolationPolicy.ALTER_INFOSET);
this.tokenizer.setContentSpacePolicy(XmlViolationPolicy.ALTER_INFOSET);
this.tokenizer.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET);
this.tokenizer.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET);
}
/**
* Parses a document from a SAX <code>InputSource</code>.
* @param is the source
* @return the doc
* @see javax.xml.parsers.DocumentBuilder#parse(org.xml.sax.InputSource)
*/
public void parse(String source, ParseEndListener callback) throws SAXException {
parseEndListener = callback;
domTreeBuilder.setFragmentContext(null);
tokenize(source, null);
}
/**
* @param is
* @throws SAXException
* @throws IOException
* @throws MalformedURLException
*/
private void tokenize(String source, String context) throws SAXException {
lastWasCR = false;
ending = false;
documentWriteBuffer.setLength(0);
streamLength = source.length();
stream = new UTF16Buffer(source.toCharArray(), 0,
(streamLength < CHUNK_SIZE ? streamLength : CHUNK_SIZE));
bufferStack.clear();
push(stream);
domTreeBuilder.setFragmentContext(context == null ? null : context.intern());
tokenizer.start();
pump();
}
private void pump() throws SAXException {
if (ending) {
tokenizer.end();
domTreeBuilder.getDocument(); // drops the internal reference
parseEndListener.parseComplete();
// Don't schedule timeout
return;
}
int docWriteLen = documentWriteBuffer.length();
if (docWriteLen > 0) {
char[] newBuf = new char[docWriteLen];
documentWriteBuffer.getChars(0, docWriteLen, newBuf, 0);
push(new UTF16Buffer(newBuf, 0, docWriteLen));
documentWriteBuffer.setLength(0);
}
for (;;) {
UTF16Buffer buffer = peek();
if (!buffer.hasMore()) {
if (buffer == stream) {
if (buffer.getEnd() == streamLength) {
// Stop parsing
tokenizer.eof();
ending = true;
break;
} else {
int newEnd = buffer.getStart() + CHUNK_SIZE;
buffer.setEnd(newEnd < streamLength ? newEnd
: streamLength);
continue;
}
} else {
pop();
continue;
}
}
// now we have a non-empty buffer
buffer.adjust(lastWasCR);
lastWasCR = false;
if (buffer.hasMore()) {
lastWasCR = tokenizer.tokenizeBuffer(buffer);
domTreeBuilder.maybeRunScript();
break;
} else {
continue;
}
}
// schedule
Timer timer = new Timer() {
@Override public void run() {
try {
pump();
} catch (SAXException e) {
ending = true;
if (errorHandler != null) {
try {
errorHandler.fatalError(new SAXParseException(
e.getMessage(), null, null, -1, -1, e));
} catch (SAXException e1) {
}
}
}
}
};
timer.schedule(1);
}
private void push(UTF16Buffer buffer) {
bufferStack.addLast(buffer);
}
private UTF16Buffer peek() {
return bufferStack.getLast();
}
private void pop() {
bufferStack.removeLast();
}
public void documentWrite(String text) throws SAXException {
UTF16Buffer buffer = new UTF16Buffer(text.toCharArray(), 0, text.length());
while (buffer.hasMore()) {
buffer.adjust(lastWasCR);
lastWasCR = false;
if (buffer.hasMore()) {
lastWasCR = tokenizer.tokenizeBuffer(buffer);
domTreeBuilder.maybeRunScript();
}
}
}
/**
* @see javax.xml.parsers.DocumentBuilder#setErrorHandler(org.xml.sax.ErrorHandler)
*/
public void setErrorHandler(ErrorHandler errorHandler) {
this.errorHandler = errorHandler;
domTreeBuilder.setErrorHandler(errorHandler);
tokenizer.setErrorHandler(errorHandler);
}
/**
* Sets whether comment nodes appear in the tree.
* @param ignoreComments <code>true</code> to ignore comments
* @see nu.validator.htmlparser.impl.TreeBuilder#setIgnoringComments(boolean)
*/
public void setIgnoringComments(boolean ignoreComments) {
domTreeBuilder.setIgnoringComments(ignoreComments);
}
/**
* Sets whether the parser considers scripting to be enabled for noscript treatment.
* @param scriptingEnabled <code>true</code> to enable
* @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean)
*/
public void setScriptingEnabled(boolean scriptingEnabled) {
domTreeBuilder.setScriptingEnabled(scriptingEnabled);
}
}
@@ -0,0 +1,87 @@
/*
* Copyright (c) 2008 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.gwt;
import org.xml.sax.SAXException;
import com.google.gwt.core.client.EntryPoint;
import com.google.gwt.core.client.JavaScriptObject;
public class HtmlParserModule implements EntryPoint {
private static native void zapChildren(JavaScriptObject node) /*-{
while (node.hasChildNodes()) {
node.removeChild(node.lastChild);
}
}-*/;
private static native void installDocWrite(JavaScriptObject doc, HtmlParser parser) /*-{
doc.write = function() {
if (arguments.length == 0) {
return;
}
var text = arguments[0];
for (var i = 1; i < arguments.length; i++) {
text += arguments[i];
}
parser.@nu.validator.htmlparser.gwt.HtmlParser::documentWrite(Ljava/lang/String;)(text);
}
doc.writeln = function() {
if (arguments.length == 0) {
parser.@nu.validator.htmlparser.gwt.HtmlParser::documentWrite(Ljava/lang/String;)("\n");
return;
}
var text = arguments[0];
for (var i = 1; i < arguments.length; i++) {
text += arguments[i];
}
text += "\n";
parser.@nu.validator.htmlparser.gwt.HtmlParser::documentWrite(Ljava/lang/String;)(text);
}
}-*/;
@SuppressWarnings("unused")
private static void parseHtmlDocument(String source, JavaScriptObject document, JavaScriptObject readyCallback, JavaScriptObject errorHandler) throws SAXException {
if (readyCallback == null) {
readyCallback = JavaScriptObject.createFunction();
}
zapChildren(document);
HtmlParser parser = new HtmlParser(document);
parser.setScriptingEnabled(true);
// XXX error handler
installDocWrite(document, parser);
parser.parse(source, new ParseEndListener(readyCallback));
}
private static native void exportEntryPoints() /*-{
$wnd.parseHtmlDocument = @nu.validator.htmlparser.gwt.HtmlParserModule::parseHtmlDocument(Ljava/lang/String;Lcom/google/gwt/core/client/JavaScriptObject;Lcom/google/gwt/core/client/JavaScriptObject;Lcom/google/gwt/core/client/JavaScriptObject;);
}-*/;
public void onModuleLoad() {
exportEntryPoints();
}
}
@@ -0,0 +1,46 @@
/*
* Copyright (c) 2008 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.gwt;
import com.google.gwt.core.client.JavaScriptObject;
public class ParseEndListener {
private final JavaScriptObject callback;
/**
* @param callback
*/
public ParseEndListener(JavaScriptObject callback) {
this.callback = callback;
}
public void parseComplete() {
call(callback);
}
private static native void call(JavaScriptObject callback) /*-{
callback();
}-*/;
}
@@ -0,0 +1,225 @@
<!DOCTYPE HTML>
<html>
<head>
<title>Live DOM Viewer</title>
<script type="text/javascript" language="javascript" src="nu.validator.htmlparser.HtmlParser.nocache.js"></script>
<style>
h1 { margin: 0; }
h2 { font-size: small; margin: 1em 0 0; }
p, ul, pre { margin: 0; }
p { border: inset thin; }
textarea { width: 100%; -width: 99%; height: 8em; border: 0; }
iframe { width: 100%; height: 12em; border: 0; }
/* iframe.large { height: 24em; } */
pre { border: inset thin; padding: 0.5em; color: gray; }
pre samp { color: black; }
#dom { border: inset thin; padding: 0.5em 0.5em 0.5em 1em; color: black; min-height: 5em; font-family: monospace; background: white; }
#dom ul { padding: 0 0 0 1em; margin: 0; }
#dom li { padding: 0; margin: 0; list-style: none; position: relative; }
#dom li li { list-style: disc; }
#dom .t1 code { color: purple; font-weight: bold; }
#dom .t2 { font-style: normal; font-family: monospace; }
#dom .t2 .name { color: black; font-weight: bold; }
#dom .t2 .value { color: blue; font-weight: normal; }
#dom .t3 code, #dom .t4 code, #dom .t5 code { color: gray; }
#dom .t7 code, #dom .t8 code { color: green; }
#dom span { font-style: italic; font-family: serif; }
#dom .t10 code { color: teal; }
#dom .misparented, #dom .misparented code { color: red; font-weight: bold; }
#dom.hidden, .hidden { visibility: hidden; margin: 0.5em 0; padding: 0; height: 0; min-height: 0; }
pre#log { color: black; font: small monospace; }
script + p { border: none; font-size: smaller; margin: 0.8em 0.3em; }
</style>
<style title="Tree View">
#dom li li { list-style: none; }
#dom li:first-child::before { position: absolute; top: 0; height: 0.6em; left: -0.75em; width: 0.5em; border-style: none none solid solid; content: ''; border-width: 0.1em; }
#dom li:not(:last-child)::after { position: absolute; top: 0; bottom: -0.6em; left: -0.75em; width: 0.5em; border-style: none none solid solid; content: ''; border-width: 0.1em; }
</style>
<script>
if (navigator.userAgent.match('Gecko/(\\d+)') && RegExp.$1 == '20060217' && RegExp.$1 != '00000000') {
var style = document.getElementsByTagName('style')[1];
style.parentNode.removeChild(style);
}
</script>
</head>
<body onload="init()">
<h1>Live DOM Viewer</h1>
<h2>Markup to test (<a href="data:," id="permalink" rel="bookmark">permalink</a>, <a href="javascript:up()">upload</a>, <a href="javascript:down()">download</a>, <a href="#" onclick="toggleVisibility(this); return false">hide</a>): <span id="updown-status"></span></h2>
<p><textarea oninput="updateInput(event)" onkeydown="updateInput(event)">&lt;!DOCTYPE html>
...</textarea></p>
<h2><a href="data:," id="domview">DOM view</a> (<a href="#" onclick="toggleVisibility(this); return false;">hide</a>, <a href="#" onclick="updateDOM()">refresh</a>):</h2>
<ul id="dom"></ul>
<h2><a href="data:," id="link">Rendered view</a>: (<a href="#" onclick="toggleVisibility(this); return false;">hide</a><!--, <a href="#" onclick="grow(this)">grow</a>-->):</h2>
<p><iframe src="blank.html"></iframe></p> <!-- data:, -->
<h2>innerHTML view: (<a href="#" onclick="toggleVisibility(this); return false;">show</a>, <a href="#" onclick="updateDOM()">refresh</a>):</h2>
<pre class="hidden">&lt;!DOCTYPE HTML>&lt;html><samp></samp>&lt;/html></pre>
<h2>Log: (<a href="#" onclick="toggleVisibility(this); return false;">hide</a>):</h2>
<pre id="log">Script not loaded.</pre>
<script>
var iframe = document.getElementsByTagName('iframe')[0];
var textarea = document.getElementsByTagName('textarea')[0];
var pre = document.getElementsByTagName('samp')[0];
var dom = document.getElementsByTagName('ul')[0];
var log = document.getElementById('log');
var updownStatus = document.getElementById('updown-status');
var delayedUpdater = 0;
var lastString = '';
var logBuffer = '';
var logBuffering = false;
function updateInput(event) {
if (delayedUpdater) {
clearTimeout(delayedUpdater);
delayedUpdater = 0;
}
delayedUpdater = setTimeout(update, 100);
}
function afterParse() {
lastString = textarea.value;
setTimeout(updateDOM, 100);
updown('');
}
function update() {
if (lastString != textarea.value) {
logBuffering = true;
document.getElementById('link').href = 'data:text/html;charset=utf-8,' + encodeURIComponent(textarea.value);
iframe.contentWindow.onerror = function (a, b, c) {
record('error: ' + a + ' on line ' + c);
}
iframe.contentWindow.w = function (s) {
record('log: ' + s);
}
window.parseHtmlDocument(textarea.value, iframe.contentWindow.document, afterParse, null);
}
}
function updateDOM() {
while (pre.firstChild) pre.removeChild(pre.firstChild);
pre.appendChild(document.createTextNode(iframe.contentWindow.document.documentElement.innerHTML));
printDOM(dom, iframe.contentWindow.document);
document.getElementById('domview').href = 'data:text/plain;charset=utf-8,<ul class="domTree">' + encodeURIComponent(dom.innerHTML + '</ul>');
document.getElementById('permalink').href = '?' + encodeURIComponent(textarea.value);
record('rendering mode: ' + iframe.contentWindow.document.compatMode);
if (iframe.contentWindow.document.title)
record('document.title: ' + iframe.contentWindow.document.title);
else
record('document has no title');
while (log.firstChild != log.lastChild)
log.removeChild(log.lastChild);
log.firstChild.data = logBuffer;
logBuffering = false;
logBuffer = '';
}
function printDOM(ul, node) {
while (ul.firstChild) ul.removeChild(ul.firstChild);
for (var i = 0; i < node.childNodes.length; i += 1) {
var li = document.createElement('li');
li.className = 't' + node.childNodes[i].nodeType;
if (node.childNodes[i].nodeType == 10) {
li.appendChild(document.createTextNode('DOCTYPE: '));
}
var code = document.createElement('code');
code.appendChild(document.createTextNode(node.childNodes[i].nodeName));
li.appendChild(code);
if (node.childNodes[i].nodeValue) {
var span = document.createElement('span');
span.appendChild(document.createTextNode(node.childNodes[i].nodeValue));
li.appendChild(document.createTextNode(': '));
li.appendChild(span);
}
if (node.childNodes[i].attributes)
for (var j = 0; j < node.childNodes[i].attributes.length; j += 1) {
if (node.childNodes[i].attributes[j].specified) {
var attName = document.createElement('code');
attName.appendChild(document.createTextNode(node.childNodes[i].attributes[j].nodeName));
attName.className = 'attribute name';
var attValue = document.createElement('code');
attValue.appendChild(document.createTextNode(node.childNodes[i].attributes[j].nodeValue));
attValue.className = 'attribute value';
var att = document.createElement('span');
att.className = 't2';
att.appendChild(attName);
att.appendChild(document.createTextNode('="'));
att.appendChild(attValue);
att.appendChild(document.createTextNode('"'));
li.appendChild(document.createTextNode(' '));
li.appendChild(att);
}
}
if (node.childNodes[i].parentNode == node) {
if (node.childNodes[i].childNodes.length) {
var ul2 = document.createElement('ul');
li.appendChild(ul2);
printDOM(ul2, node.childNodes[i]);
}
} else {
li.className += ' misparented';
}
ul.appendChild(li);
}
}
function toggleVisibility(link) {
var n = link.parentNode.nextSibling;
if (n.nodeType == 3 /* text node */) n = n.nextSibling; // we should always do this but in IE, text nodes vanish
n.className = (n.className == "hidden") ? '' : 'hidden';
link.firstChild.data = n.className == "hidden" ? "show" : "hide";
}
/*
function grow(link) {
var n = link.parentNode.nextSibling;
if (n.nodeType == 3 /-* text node *-/) n = n.nextSibling; // we should always do this but in IE, text nodes vanish
n.className = (n.className == "large") ? '' : 'large';
link.firstChild.data = n.className == "grow" ? "shrink" : "grow";
}
*/
function down() {
updown('downloading...');
var request = window.XMLHttpRequest ? new XMLHttpRequest() : new ActiveXObject("Microsoft.XMLHTTP");
request.onreadystatechange = function () {
updown('downloading... ' + request.readyState + '/4');
if (request.readyState == 4) {
textarea.value = request.responseText;
update();
updown('downloaded');
}
};
request.open('GET', 'clipboard.cgi', true);
request.send(null);
}
function up() {
updown('uploading...');
var request = window.XMLHttpRequest ? new XMLHttpRequest() : new ActiveXObject("Microsoft.XMLHTTP");
request.onreadystatechange = function () {
updown('uploading... ' + request.readyState + '/4');
if (request.readyState == 4) {
updown('uploaded');
}
};
request.open('POST', 'clipboard.cgi', true);
request.setRequestHeader('Content-Type', 'text/plain');
request.send(textarea.value);
}
function init() {
var uri = location.search;
if (uri)
textarea.value = decodeURIComponent(uri.substring(1, uri.length));
update();
}
function record(s) {
if (logBuffering)
logBuffer += s + '\r\n';
else
log.appendChild(document.createTextNode(s + '\r\n'));
}
function updown(s) {
while (updownStatus.firstChild) updownStatus.removeChild(updownStatus.firstChild);
updownStatus.appendChild(document.createTextNode(s));
}
</script>
<p>This script puts a function <code>w(<var>s</var>)</code> into the
global scope of the test page, where <var>s</vaR> is a string to
output to the log. Also, five files are accessible in the current
directory for test purposes: <code>image</code> (a GIF image),
<code>flash</code> (a Flash file), <code>script</code> (a JS file),
<code>style</code> (a CSS file), and <code>document</code> (an HTML
file).</p>
</body>
</html>
@@ -0,0 +1,25 @@
From:
http://software.hixie.ch/utilities/js/live-dom-viewer/LICENSE
regarding the upstream of HtmlParser.html:
The MIT License
Copyright (c) 2000, 2006, 2008 Ian Hickson and various contributors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
@@ -0,0 +1,2 @@
<!DOCTYPE html>
<title></title>