Add java htmlparser sources that match the original 52-level state

https://hg.mozilla.org/projects/htmlparser/
Commit: abe62ab2a9b69ccb3b5d8a231ec1ae11154c571d
This commit is contained in:
Matt A. Tobin
2020-01-15 14:56:04 -05:00
parent 09314667a6
commit 6168dbe21f
248 changed files with 62553 additions and 0 deletions
@@ -0,0 +1,115 @@
/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2008 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.test;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction;
import nu.validator.htmlparser.common.Heuristics;
import nu.validator.htmlparser.io.Encoding;
import nu.validator.htmlparser.io.HtmlInputStreamReader;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
public class DecoderLoopTester {
private static final int LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
private static final int NUMBER_OR_ASTRAL_CHARS = 24500;
private void runTest(int padding) throws SAXException, IOException {
Encoding utf8 = Encoding.forName("UTF-8");
char[] charArr = new char[1 + padding + 2 * NUMBER_OR_ASTRAL_CHARS];
byte[] byteArr;
int i = 0;
charArr[i++] = '\uFEFF';
for (int j = 0; j < padding; j++) {
charArr[i++] = 'x';
}
for (int j = 0; j < NUMBER_OR_ASTRAL_CHARS; j++) {
int value = 0x10000 + j;
charArr[i++] = (char) (LEAD_OFFSET + (value >> 10));
charArr[i++] = (char) (0xDC00 + (value & 0x3FF));
// charArr[i++] = 'y';
// charArr[i++] = 'z';
}
CharBuffer charBuffer = CharBuffer.wrap(charArr);
CharsetEncoder enc = utf8.newEncoder();
enc.onMalformedInput(CodingErrorAction.REPORT);
enc.onUnmappableCharacter(CodingErrorAction.REPORT);
ByteBuffer byteBuffer = enc.encode(charBuffer);
byteArr = new byte[byteBuffer.limit()];
byteBuffer.get(byteArr);
ErrorHandler eh = new SystemErrErrorHandler();
compare(new HtmlInputStreamReader(new ByteArrayInputStream(byteArr), eh, null, null, Heuristics.NONE), padding, charArr, byteArr);
compare(new HtmlInputStreamReader(new ByteArrayInputStream(byteArr), eh, null, null, utf8), padding, charArr, byteArr);
}
/**
* @param padding
* @param charArr
* @param byteArr
* @throws SAXException
* @throws IOException
*/
private void compare(HtmlInputStreamReader reader, int padding, char[] charArr, byte[] byteArr) throws SAXException, IOException {
char[] readBuffer = new char[2048];
int offset = 0;
int num = 0;
int readNum = 0;
while ((num = reader.read(readBuffer)) != -1) {
for (int j = 0; j < num; j++) {
System.out.println(offset + j);
if (readBuffer[j] != charArr[offset + j]) {
throw new RuntimeException("Test failed. Char: " + Integer.toHexString(readBuffer[j]) + " j: " + j + " readNum: " + readNum);
}
}
offset += num;
readNum++;
}
}
void runTests() throws SAXException, IOException {
for (int i = 0; i < 4; i++) {
runTest(i);
}
}
/**
* @param args
* @throws IOException
* @throws SAXException
*/
public static void main(String[] args) throws IOException, SAXException {
new DecoderLoopTester().runTests();
}
}
@@ -0,0 +1,49 @@
/*
* Copyright (c) 2008 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.test;
import java.io.IOException;
import java.io.StringReader;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import nu.validator.htmlparser.dom.HtmlDocumentBuilder;
public class DomIdTester {
private static final String testSrc = "<div><h1 id='bar' class='foo'>buoeoa</h1><p id='foo'>uoeuo</p></div>";
/**
* @param args
* @throws IOException
* @throws SAXException
*/
public static void main(String[] args) throws SAXException, IOException {
HtmlDocumentBuilder builder = new HtmlDocumentBuilder();
Document doc = builder.parse(new InputSource(new StringReader(testSrc)));
System.out.println(doc.getElementById("foo").getLocalName());
}
}
@@ -0,0 +1,40 @@
/*
* Copyright (c) 2009 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.test;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
public class DomTest {
public static void main(String[] args) throws Exception {
DocumentBuilderFactory f = DocumentBuilderFactory.newInstance();
f.setNamespaceAware(true); // not setting this causes pain and suffering with SVG
DocumentBuilder b = f.newDocumentBuilder();
Document d = b.newDocument();
Element e = d.createElementNS("http://www.w3.org/1999/xhtml", "html");
e.setAttribute("xmlns:foo", "bar");
}
}
@@ -0,0 +1,123 @@
/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2008 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.test;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import nu.validator.htmlparser.common.Heuristics;
import nu.validator.htmlparser.io.Encoding;
import nu.validator.htmlparser.io.HtmlInputStreamReader;
import org.xml.sax.SAXException;
public class EncodingTester {
private final InputStream aggregateStream;
private final StringBuilder builder = new StringBuilder();
/**
* @param aggregateStream
*/
public EncodingTester(InputStream aggregateStream) {
this.aggregateStream = aggregateStream;
}
private void runTests() throws IOException, SAXException {
while (runTest()) {
// spin
}
}
private boolean runTest() throws IOException, SAXException {
if (skipLabel()) {
return false;
}
UntilHashInputStream stream = new UntilHashInputStream(aggregateStream);
HtmlInputStreamReader reader = new HtmlInputStreamReader(stream, null,
null, null, Heuristics.NONE);
Charset charset = reader.getCharset();
stream.close();
if (skipLabel()) {
System.err.println("Premature end of test data.");
return false;
}
builder.setLength(0);
loop: for (;;) {
int b = aggregateStream.read();
switch (b) {
case '\n':
break loop;
case -1:
System.err.println("Premature end of test data.");
return false;
default:
builder.append(((char) b));
}
}
String sniffed = charset.name();
String expected = Encoding.forName(builder.toString()).newDecoder().charset().name();
if (expected.equalsIgnoreCase(sniffed)) {
System.err.println("Success.");
// System.err.println(stream);
} else {
System.err.println("Failure. Expected: " + expected + " got "
+ sniffed + ".");
System.err.println(stream);
}
return true;
}
private boolean skipLabel() throws IOException {
int b = aggregateStream.read();
if (b == -1) {
return true;
}
for (;;) {
b = aggregateStream.read();
if (b == -1) {
return true;
} else if (b == 0x0A) {
return false;
}
}
}
/**
* @param args
* @throws SAXException
* @throws IOException
*/
public static void main(String[] args) throws IOException, SAXException {
for (int i = 0; i < args.length; i++) {
EncodingTester tester = new EncodingTester(new FileInputStream(
args[i]));
tester.runTests();
}
}
}
@@ -0,0 +1,185 @@
/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2008 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.test;
import nu.validator.htmlparser.common.TokenHandler;
import nu.validator.htmlparser.impl.ElementName;
import nu.validator.htmlparser.impl.HtmlAttributes;
import nu.validator.htmlparser.impl.Tokenizer;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import com.sdicons.json.model.JSONArray;
import com.sdicons.json.model.JSONBoolean;
import com.sdicons.json.model.JSONNull;
import com.sdicons.json.model.JSONObject;
import com.sdicons.json.model.JSONString;
public class JSONArrayTokenHandler implements TokenHandler, ErrorHandler {
private static final JSONString DOCTYPE = new JSONString("DOCTYPE");
private static final JSONString START_TAG = new JSONString("StartTag");
private static final JSONString END_TAG = new JSONString("EndTag");
private static final JSONString COMMENT = new JSONString("Comment");
private static final JSONString CHARACTER = new JSONString("Character");
private static final JSONString PARSE_ERROR = new JSONString("ParseError");
private static final char[] REPLACEMENT_CHARACTER = { '\uFFFD' };
private final StringBuilder builder = new StringBuilder();
private JSONArray array = null;
private int contentModelFlag;
private String contentModelElement;
public void setContentModelFlag(int contentModelFlag, String contentModelElement) {
this.contentModelFlag = contentModelFlag;
this.contentModelElement = contentModelElement;
}
public void characters(char[] buf, int start, int length)
throws SAXException {
builder.append(buf, start, length);
}
private void flushCharacters() {
if (builder.length() > 0) {
JSONArray token = new JSONArray();
token.getValue().add(CHARACTER);
token.getValue().add(new JSONString(builder.toString()));
array.getValue().add(token);
builder.setLength(0);
}
}
public void comment(char[] buf, int start, int length) throws SAXException {
flushCharacters();
JSONArray token = new JSONArray();
token.getValue().add(COMMENT);
token.getValue().add(new JSONString(new String(buf, start, length)));
array.getValue().add(token);
}
public void doctype(String name, String publicIdentifier, String systemIdentifier, boolean forceQuirks) throws SAXException {
flushCharacters();
JSONArray token = new JSONArray();
token.getValue().add(DOCTYPE);
token.getValue().add(new JSONString(name));
token.getValue().add(publicIdentifier == null ? JSONNull.NULL : new JSONString(publicIdentifier));
token.getValue().add(systemIdentifier == null ? JSONNull.NULL : new JSONString(systemIdentifier));
token.getValue().add(new JSONBoolean(!forceQuirks));
array.getValue().add(token);
}
public void endTag(ElementName eltName) throws SAXException {
String name = eltName.name;
flushCharacters();
JSONArray token = new JSONArray();
token.getValue().add(END_TAG);
token.getValue().add(new JSONString(name));
array.getValue().add(token);
}
public void eof() throws SAXException {
flushCharacters();
}
public void startTokenization(Tokenizer self) throws SAXException {
array = new JSONArray();
if (contentModelElement != null) {
self.setStateAndEndTagExpectation(contentModelFlag, contentModelElement);
}
}
public void startTag(ElementName eltName, HtmlAttributes attributes,
boolean selfClosing) throws SAXException {
String name = eltName.name;
flushCharacters();
JSONArray token = new JSONArray();
token.getValue().add(START_TAG);
token.getValue().add(new JSONString(name));
JSONObject attrs = new JSONObject();
for (int i = 0; i < attributes.getLength(); i++) {
attrs.getValue().put(attributes.getQNameNoBoundsCheck(i),
new JSONString(attributes.getValueNoBoundsCheck(i)));
}
token.getValue().add(attrs);
if (selfClosing) {
token.getValue().add(JSONBoolean.TRUE);
}
array.getValue().add(token);
}
public boolean wantsComments() throws SAXException {
return true;
}
public void error(SAXParseException exception) throws SAXException {
flushCharacters();
array.getValue().add(PARSE_ERROR);
}
public void fatalError(SAXParseException exception) throws SAXException {
throw new RuntimeException("Should never happen.");
}
public void warning(SAXParseException exception) throws SAXException {
}
/**
* Returns the array.
*
* @return the array
*/
public JSONArray getArray() {
return array;
}
public void endTokenization() throws SAXException {
}
@Override public void zeroOriginatingReplacementCharacter()
throws SAXException {
builder.append(REPLACEMENT_CHARACTER, 0, 1);
}
@Override public boolean cdataSectionAllowed() throws SAXException {
return false;
}
@Override public void ensureBufferSpace(int inputLength)
throws SAXException {
}
}
@@ -0,0 +1,66 @@
/*
* Copyright (c) 2007 Henri Sivonen
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.test;
import java.util.LinkedList;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
public class ListErrorHandler implements ErrorHandler {
private boolean fatal = false;
private LinkedList<String> errors = new LinkedList<String>();
public void error(SAXParseException spe) throws SAXException {
errors.add(Integer.toString(spe.getColumnNumber()) + ": " + spe.getMessage());
}
public void fatalError(SAXParseException arg0) throws SAXException {
fatal = true;
}
public void warning(SAXParseException arg0) throws SAXException {
}
/**
* Returns the errors.
*
* @return the errors
*/
public LinkedList<String> getErrors() {
return errors;
}
/**
* Returns the fatal.
*
* @return the fatal
*/
public boolean isFatal() {
return fatal;
}
}
@@ -0,0 +1,201 @@
/*
* Copyright (c) 2005, 2007 Henri Sivonen
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.test;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import javax.xml.transform.ErrorListener;
import javax.xml.transform.SourceLocator;
import javax.xml.transform.TransformerException;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
/**
* @version $Id$
* @author hsivonen
*/
public class SystemErrErrorHandler implements ErrorHandler, ErrorListener {
private Writer out;
private boolean inError = false;
public SystemErrErrorHandler() {
try {
out = new OutputStreamWriter(System.err, "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
/**
* @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException)
*/
public void warning(SAXParseException e) throws SAXException {
try {
out.write("Warning:\n");
out.write(e.getMessage());
out.write("\nFile: ");
String systemId = e.getSystemId();
out.write((systemId == null) ? "Unknown" : systemId);
out.write("\nLine: ");
out.write(Integer.toString(e.getLineNumber()));
out.write(" Col: ");
out.write(Integer.toString(e.getColumnNumber()));
out.write("\n\n");
out.flush();
} catch (IOException e1) {
throw new SAXException(e1);
}
}
/**
* @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException)
*/
public void error(SAXParseException e) throws SAXException {
inError = true;
try {
out.write("Error:\n");
out.write(e.getMessage());
out.write("\nFile: ");
String systemId = e.getSystemId();
out.write((systemId == null) ? "Unknown" : systemId);
out.write("\nLine: ");
out.write(Integer.toString(e.getLineNumber()));
out.write(" Col: ");
out.write(Integer.toString(e.getColumnNumber()));
out.write("\n\n");
out.flush();
} catch (IOException e1) {
throw new SAXException(e1);
}
}
/**
* @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException)
*/
public void fatalError(SAXParseException e) throws SAXException {
inError = true;
try {
out.write("Fatal Error:\n");
out.write(e.getMessage());
out.write("\nFile: ");
String systemId = e.getSystemId();
out.write((systemId == null) ? "Unknown" : systemId);
out.write("\nLine: ");
out.write(Integer.toString(e.getLineNumber()));
out.write(" Col: ");
out.write(Integer.toString(e.getColumnNumber()));
out.write("\n\n");
out.flush();
} catch (IOException e1) {
throw new SAXException(e1);
}
}
/**
* Returns the inError.
*
* @return the inError
*/
public boolean isInError() {
return inError;
}
public void reset() {
inError = false;
}
public void error(TransformerException e) throws TransformerException {
inError = true;
try {
out.write("Error:\n");
out.write(e.getMessage());
SourceLocator sourceLocator = e.getLocator();
if (sourceLocator != null) {
out.write("\nFile: ");
String systemId = sourceLocator.getSystemId();
out.write((systemId == null) ? "Unknown" : systemId);
out.write("\nLine: ");
out.write(Integer.toString(sourceLocator.getLineNumber()));
out.write(" Col: ");
out.write(Integer.toString(sourceLocator.getColumnNumber()));
}
out.write("\n\n");
out.flush();
} catch (IOException e1) {
throw new TransformerException(e1);
}
}
public void fatalError(TransformerException e)
throws TransformerException {
inError = true;
try {
out.write("Fatal Error:\n");
out.write(e.getMessage());
SourceLocator sourceLocator = e.getLocator();
if (sourceLocator != null) {
out.write("\nFile: ");
String systemId = sourceLocator.getSystemId();
out.write((systemId == null) ? "Unknown" : systemId);
out.write("\nLine: ");
out.write(Integer.toString(sourceLocator.getLineNumber()));
out.write(" Col: ");
out.write(Integer.toString(sourceLocator.getColumnNumber()));
}
out.write("\n\n");
out.flush();
} catch (IOException e1) {
throw new TransformerException(e1);
}
}
public void warning(TransformerException e)
throws TransformerException {
try {
out.write("Warning:\n");
out.write(e.getMessage());
SourceLocator sourceLocator = e.getLocator();
if (sourceLocator != null) {
out.write("\nFile: ");
String systemId = sourceLocator.getSystemId();
out.write((systemId == null) ? "Unknown" : systemId);
out.write("\nLine: ");
out.write(Integer.toString(sourceLocator.getLineNumber()));
out.write(" Col: ");
out.write(Integer.toString(sourceLocator.getColumnNumber()));
}
out.write("\n\n");
out.flush();
} catch (IOException e1) {
throw new TransformerException(e1);
}
}
}
@@ -0,0 +1,210 @@
/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2008 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.test;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import nu.validator.htmlparser.common.TokenHandler;
import nu.validator.htmlparser.impl.ElementName;
import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
import nu.validator.htmlparser.impl.HtmlAttributes;
import nu.validator.htmlparser.impl.Tokenizer;
import nu.validator.htmlparser.io.Driver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
public class TokenPrinter implements TokenHandler, ErrorHandler {
private final Writer writer;
public void characters(char[] buf, int start, int length)
throws SAXException {
try {
boolean lineStarted = true;
writer.write('-');
for (int i = start; i < start + length; i++) {
if (!lineStarted) {
writer.write("\n-");
lineStarted = true;
}
char c = buf[i];
if (c == '\n') {
writer.write("\\n");
lineStarted = false;
} else {
writer.write(c);
}
}
writer.write('\n');
} catch (IOException e) {
throw new SAXException(e);
}
}
public void comment(char[] buf, int start, int length) throws SAXException {
try {
writer.write('!');
writer.write(buf, start, length);
writer.write('\n');
} catch (IOException e) {
throw new SAXException(e);
}
}
public void doctype(String name, String publicIdentifier, String systemIdentifier, boolean forceQuirks) throws SAXException {
try {
writer.write('D');
writer.write(name);
writer.write(' ');
writer.write("" + forceQuirks);
writer.write('\n');
} catch (IOException e) {
throw new SAXException(e);
}
}
public void endTag(ElementName eltName) throws SAXException {
try {
writer.write(')');
writer.write(eltName.name);
writer.write('\n');
} catch (IOException e) {
throw new SAXException(e);
}
}
public void eof() throws SAXException {
try {
writer.write("E\n");
} catch (IOException e) {
throw new SAXException(e);
}
}
public void startTokenization(Tokenizer self) throws SAXException {
}
public void startTag(ElementName eltName, HtmlAttributes attributes, boolean selfClosing)
throws SAXException {
try {
writer.write('(');
writer.write(eltName.name);
writer.write('\n');
for (int i = 0; i < attributes.getLength(); i++) {
writer.write('A');
writer.write(attributes.getQNameNoBoundsCheck(i));
writer.write(' ');
writer.write(attributes.getValueNoBoundsCheck(i));
writer.write('\n');
}
} catch (IOException e) {
throw new SAXException(e);
}
}
public boolean wantsComments() throws SAXException {
return true;
}
public static void main(String[] args) throws SAXException, IOException {
TokenPrinter printer = new TokenPrinter(new OutputStreamWriter(System.out, "UTF-8"));
Driver tokenizer = new Driver(new ErrorReportingTokenizer(printer));
tokenizer.setErrorHandler(printer);
File file = new File(args[0]);
InputSource is = new InputSource(new FileInputStream(file));
is.setSystemId(file.toURI().toASCIIString());
tokenizer.tokenize(is);
}
/**
* @param writer
*/
public TokenPrinter(final Writer writer) {
this.writer = writer;
}
public void error(SAXParseException exception) throws SAXException {
try {
writer.write("R ");
writer.write(exception.getMessage());
writer.write("\n");
} catch (IOException e) {
throw new SAXException(e);
}
}
public void fatalError(SAXParseException exception) throws SAXException {
try {
writer.write("F ");
writer.write(exception.getMessage());
writer.write("\n");
} catch (IOException e) {
throw new SAXException(e);
}
}
public void warning(SAXParseException exception) throws SAXException {
try {
writer.write("W ");
writer.write(exception.getMessage());
writer.write("\n");
} catch (IOException e) {
throw new SAXException(e);
}
}
public void endTokenization() throws SAXException {
try {
writer.flush();
writer.close();
} catch (IOException e) {
throw new SAXException(e);
}
}
@Override public void zeroOriginatingReplacementCharacter()
throws SAXException {
try {
writer.write("0\n");
} catch (IOException e) {
throw new SAXException(e);
}
}
@Override public boolean cdataSectionAllowed() throws SAXException {
return false;
}
@Override public void ensureBufferSpace(int inputLength)
throws SAXException {
}
}
@@ -0,0 +1,211 @@
/*
* Copyright (c) 2007 Henri Sivonen
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.test;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import nu.validator.htmlparser.common.XmlViolationPolicy;
import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
import nu.validator.htmlparser.impl.Tokenizer;
import nu.validator.htmlparser.io.Driver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import antlr.RecognitionException;
import antlr.TokenStreamException;
import com.sdicons.json.model.JSONArray;
import com.sdicons.json.model.JSONObject;
import com.sdicons.json.model.JSONString;
import com.sdicons.json.model.JSONValue;
import com.sdicons.json.parser.JSONParser;
public class TokenizerTester {
private static JSONString PLAINTEXT = new JSONString("PLAINTEXT state");
private static JSONString PCDATA = new JSONString("DATA state");
private static JSONString RCDATA = new JSONString("RCDATA state");
private static JSONString RAWTEXT = new JSONString("RAWTEXT state");
private static boolean jsonDeepEquals(JSONValue one, JSONValue other) {
if (one.isSimple()) {
return one.equals(other);
} else if (one.isArray()) {
if (other.isArray()) {
JSONArray oneArr = (JSONArray) one;
JSONArray otherArr = (JSONArray) other;
return oneArr.getValue().equals(otherArr.getValue());
} else {
return false;
}
} else if (one.isObject()) {
if (other.isObject()) {
JSONObject oneObject = (JSONObject) one;
JSONObject otherObject = (JSONObject) other;
return oneObject.getValue().equals(otherObject.getValue());
} else {
return false;
}
} else {
throw new RuntimeException("Should never happen.");
}
}
private JSONArray tests;
private final JSONArrayTokenHandler tokenHandler;
private final Driver driver;
private final Writer writer;
private TokenizerTester(InputStream stream) throws TokenStreamException,
RecognitionException, UnsupportedEncodingException {
tokenHandler = new JSONArrayTokenHandler();
driver = new Driver(new ErrorReportingTokenizer(tokenHandler));
driver.setCommentPolicy(XmlViolationPolicy.ALLOW);
driver.setContentNonXmlCharPolicy(XmlViolationPolicy.ALLOW);
driver.setContentSpacePolicy(XmlViolationPolicy.ALLOW);
driver.setNamePolicy(XmlViolationPolicy.ALLOW);
driver.setXmlnsPolicy(XmlViolationPolicy.ALLOW);
driver.setErrorHandler(tokenHandler);
writer = new OutputStreamWriter(System.out, "UTF-8");
JSONParser jsonParser = new JSONParser(new InputStreamReader(stream,
"UTF-8"));
JSONObject obj = (JSONObject) jsonParser.nextValue();
tests = (JSONArray) obj.get("tests");
if (tests == null) {
tests = (JSONArray) obj.get("xmlViolationTests");
driver.setCommentPolicy(XmlViolationPolicy.ALTER_INFOSET);
driver.setContentNonXmlCharPolicy(XmlViolationPolicy.ALTER_INFOSET);
driver.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET);
driver.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET);
}
}
private void runTests() throws SAXException, IOException {
for (JSONValue val : tests.getValue()) {
runTest((JSONObject) val);
}
writer.flush();
}
private void runTest(JSONObject test) throws SAXException, IOException {
String inputString = ((JSONString) test.get("input")).getValue();
JSONArray expectedTokens = (JSONArray) test.get("output");
String description = ((JSONString) test.get("description")).getValue();
JSONString lastStartTagJSON = ((JSONString) test.get("lastStartTag"));
String lastStartTag = lastStartTagJSON == null ? null
: lastStartTagJSON.getValue();
JSONArray contentModelFlags = (JSONArray) test.get("initialStates");
if (contentModelFlags == null) {
runTestInner(inputString, expectedTokens, description,
Tokenizer.DATA, null);
} else {
for (JSONValue value : contentModelFlags.getValue()) {
if (PCDATA.equals(value)) {
runTestInner(inputString, expectedTokens, description,
Tokenizer.DATA, lastStartTag);
} else if (RAWTEXT.equals(value)) {
runTestInner(inputString, expectedTokens, description,
Tokenizer.RAWTEXT, lastStartTag);
} else if (RCDATA.equals(value)) {
runTestInner(inputString, expectedTokens, description,
Tokenizer.RCDATA, lastStartTag);
} else if (PLAINTEXT.equals(value)) {
runTestInner(inputString, expectedTokens, description,
Tokenizer.PLAINTEXT, lastStartTag);
} else {
throw new RuntimeException("Broken test data.");
}
}
}
}
/**
* @param contentModelElement
* @param contentModelFlag
* @param test
* @throws SAXException
* @throws IOException
*/
private void runTestInner(String inputString, JSONArray expectedTokens,
String description, int contentModelFlag,
String contentModelElement) throws SAXException, IOException {
tokenHandler.setContentModelFlag(contentModelFlag, contentModelElement);
InputSource is = new InputSource(new StringReader(inputString));
try {
driver.tokenize(is);
JSONArray actualTokens = tokenHandler.getArray();
if (jsonDeepEquals(actualTokens, expectedTokens)) {
writer.write("Success\n");
} else {
writer.write("Failure\n");
writer.write(description);
writer.write("\nInput:\n");
writer.write(inputString);
writer.write("\nExpected tokens:\n");
writer.write(expectedTokens.render(false));
writer.write("\nActual tokens:\n");
writer.write(actualTokens.render(false));
writer.write("\n");
}
} catch (Throwable t) {
writer.write("Failure\n");
writer.write(description);
writer.write("\nInput:\n");
writer.write(inputString);
writer.write("\n");
t.printStackTrace(new PrintWriter(writer, false));
}
}
/**
* @param args
* @throws RecognitionException
* @throws TokenStreamException
* @throws IOException
* @throws SAXException
*/
public static void main(String[] args) throws TokenStreamException,
RecognitionException, SAXException, IOException {
for (int i = 0; i < args.length; i++) {
TokenizerTester tester = new TokenizerTester(new FileInputStream(
args[i]));
tester.runTests();
}
}
}
@@ -0,0 +1,239 @@
/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2008 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.test;
import java.io.IOException;
import java.io.Writer;
import java.util.Map;
import java.util.TreeMap;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
public class TreeDumpContentHandler implements ContentHandler, LexicalHandler {
private final Writer writer;
private int level = 0;
private boolean inCharacters = false;
private boolean close;
/**
* @param writer
*/
public TreeDumpContentHandler(final Writer writer, boolean close) {
this.writer = writer;
this.close = close;
}
public TreeDumpContentHandler(final Writer writer) {
this(writer, true);
}
private void printLead() throws IOException {
if (inCharacters) {
writer.write("\"\n");
inCharacters = false;
}
writer.write("| ");
for (int i = 0; i < level; i++) {
writer.write(" ");
}
}
public void characters(char[] ch, int start, int length)
throws SAXException {
try {
if (!inCharacters) {
printLead();
writer.write('"');
inCharacters = true;
}
writer.write(ch, start, length);
} catch (IOException e) {
throw new SAXException(e);
}
}
public void endElement(String uri, String localName, String qName)
throws SAXException {
try {
if (inCharacters) {
writer.write("\"\n");
inCharacters = false;
}
level--;
if ("http://www.w3.org/1999/xhtml" == uri &&
"template" == localName) {
// decrement level for the "content"
level--;
}
} catch (IOException e) {
throw new SAXException(e);
}
}
public void startElement(String uri, String localName, String qName,
Attributes atts) throws SAXException {
try {
printLead();
writer.write('<');
if ("http://www.w3.org/1998/Math/MathML" == uri) {
writer.write("math ");
} else if ("http://www.w3.org/2000/svg" == uri) {
writer.write("svg ");
} else if ("http://www.w3.org/1999/xhtml" != uri) {
writer.write("otherns ");
}
writer.write(localName);
writer.write(">\n");
level++;
TreeMap<String, String> map = new TreeMap<String, String>();
for (int i = 0; i < atts.getLength(); i++) {
String ns = atts.getURI(i);
String name;
if ("http://www.w3.org/1999/xlink" == ns) {
name = "xlink " + atts.getLocalName(i);
} else if ("http://www.w3.org/XML/1998/namespace" == ns) {
name = "xml " + atts.getLocalName(i);
} else if ("http://www.w3.org/2000/xmlns/" == ns) {
name = "xmlns " + atts.getLocalName(i);
} else if ("" != uri) {
name = atts.getLocalName(i);
} else {
name = "otherns " + atts.getLocalName(i);
}
map.put(name, atts.getValue(i));
}
for (Map.Entry<String, String> entry : map.entrySet()) {
printLead();
writer.write(entry.getKey());
writer.write("=\"");
writer.write(entry.getValue());
writer.write("\"\n");
}
if ("http://www.w3.org/1999/xhtml" == uri &&
"template" == localName) {
printLead();
level++;
writer.write("content\n");
}
} catch (IOException e) {
throw new SAXException(e);
}
}
public void comment(char[] ch, int offset, int len) throws SAXException {
try {
printLead();
writer.write("<!-- ");
writer.write(ch, offset, len);
writer.write(" -->\n");
} catch (IOException e) {
throw new SAXException(e);
}
}
public void startDTD(String name, String publicIdentifier,
String systemIdentifier) throws SAXException {
try {
printLead();
writer.write("<!DOCTYPE ");
writer.write(name);
if (publicIdentifier.length() > 0 || systemIdentifier.length() > 0) {
writer.write(' ');
writer.write('\"');
writer.write(publicIdentifier);
writer.write('\"');
writer.write(' ');
writer.write('\"');
writer.write(systemIdentifier);
writer.write('\"');
}
writer.write(">\n");
} catch (IOException e) {
throw new SAXException(e);
}
}
public void endDocument() throws SAXException {
try {
if (inCharacters) {
writer.write("\"\n");
inCharacters = false;
}
if (close) {
writer.flush();
writer.close();
}
} catch (IOException e) {
throw new SAXException(e);
}
}
public void startPrefixMapping(String prefix, String uri)
throws SAXException {
}
public void startEntity(String arg0) throws SAXException {
}
public void endCDATA() throws SAXException {
}
public void endDTD() throws SAXException {
}
public void endEntity(String arg0) throws SAXException {
}
public void startCDATA() throws SAXException {
}
public void endPrefixMapping(String prefix) throws SAXException {
}
public void ignorableWhitespace(char[] ch, int start, int length)
throws SAXException {
}
public void processingInstruction(String target, String data)
throws SAXException {
}
public void setDocumentLocator(Locator locator) {
}
public void skippedEntity(String name) throws SAXException {
}
public void startDocument() throws SAXException {
}
}
@@ -0,0 +1,50 @@
/*
* Copyright (c) 2007 Henri Sivonen
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.test;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import nu.validator.htmlparser.common.XmlViolationPolicy;
import nu.validator.htmlparser.sax.HtmlParser;
public class TreePrinter {
public static void main(String[] args) throws SAXException, IOException {
TreeDumpContentHandler treeDumpContentHandler = new TreeDumpContentHandler(new OutputStreamWriter(System.out, "UTF-8"));
HtmlParser htmlParser = new HtmlParser();
htmlParser.setContentHandler(treeDumpContentHandler);
htmlParser.setLexicalHandler(treeDumpContentHandler);
htmlParser.setErrorHandler(new SystemErrErrorHandler());
htmlParser.setXmlPolicy(XmlViolationPolicy.ALLOW);
File file = new File(args[0]);
InputSource is = new InputSource(new FileInputStream(file));
is.setSystemId(file.toURI().toASCIIString());
htmlParser.parse(is);
}
}
@@ -0,0 +1,246 @@
/*
* Copyright (c) 2007 Henri Sivonen
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.test;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringWriter;
import java.util.LinkedList;
import nu.validator.htmlparser.common.XmlViolationPolicy;
import nu.validator.htmlparser.sax.HtmlParser;
import org.xml.sax.InputSource;
import org.xml.sax.SAXParseException;
public class TreeTester {
private final BufferedInputStream aggregateStream;
private boolean streaming = false;
/**
* @param aggregateStream
*/
public TreeTester(InputStream aggregateStream) {
this.aggregateStream = new BufferedInputStream(aggregateStream);
}
private void runTests() throws Throwable {
if (aggregateStream.read() != '#') {
System.err.println("No hash at start!");
return;
}
while (runTest()) {
// spin
}
}
private boolean runTest() throws Throwable {
UntilHashInputStream stream = null;
try {
String context = null;
boolean scriptingEnabled = true;
boolean hadScriptingDirective = false;
aggregateStream.mark(12288);
if (skipLabel()) { // #data
return false;
}
stream = new UntilHashInputStream(aggregateStream);
while (stream.read() != -1) {
// spin
}
if (skipLabel()) { // #errors
System.err.println("Premature end of test data.");
return false;
}
stream = new UntilHashInputStream(aggregateStream);
while (stream.read() != -1) {
// spin
}
StringBuilder sb = new StringBuilder();
int c;
while ((c = aggregateStream.read()) != '\n') {
sb.append((char) c);
}
String label = sb.toString();
if ("document-fragment".equals(label)) {
sb.setLength(0);
while ((c = aggregateStream.read()) != '\n') {
sb.append((char) c);
}
context = sb.toString();
// Now potentially gather #script-on/off
sb.setLength(0);
while ((c = aggregateStream.read()) != '\n') {
sb.append((char) c);
}
label = sb.toString();
}
if ("script-on".equals(label)) {
hadScriptingDirective = true;
} else if ("script-off".equals(label)) {
hadScriptingDirective = true;
scriptingEnabled = false;
}
aggregateStream.reset();
if (skipLabel()) { // #data
System.err.println("Premature end of test data.");
return false;
}
stream = new UntilHashInputStream(aggregateStream);
InputSource is = new InputSource(stream);
is.setEncoding("UTF-8");
StringWriter sw = new StringWriter();
ListErrorHandler leh = new ListErrorHandler();
TreeDumpContentHandler treeDumpContentHandler = new TreeDumpContentHandler(
sw);
HtmlParser htmlParser = new HtmlParser(XmlViolationPolicy.ALLOW);
if (streaming) {
htmlParser.setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL);
}
htmlParser.setContentHandler(treeDumpContentHandler);
htmlParser.setLexicalHandler(treeDumpContentHandler);
htmlParser.setErrorHandler(leh);
htmlParser.setScriptingEnabled(scriptingEnabled);
try {
if (context == null) {
htmlParser.parse(is);
} else {
String ns = "http://www.w3.org/1999/xhtml";
if (context.startsWith("svg ")) {
ns = "http://www.w3.org/2000/svg";
context = context.substring(4);
} else if (context.startsWith("math ")) {
ns = "http://www.w3.org/1998/Math/MathML";
context = context.substring(5);
}
htmlParser.parseFragment(is, context, ns);
treeDumpContentHandler.endDocument();
}
} catch (SAXParseException e) {
// ignore
}
stream.close();
if (skipLabel()) { // #errors
System.err.println("Premature end of test data.");
return false;
}
LinkedList<String> expectedErrors = new LinkedList<String>();
BufferedReader br = new BufferedReader(new InputStreamReader(
new UntilHashInputStream(aggregateStream), "UTF-8"));
String line = null;
while ((line = br.readLine()) != null) {
expectedErrors.add(line);
}
if (context != null) {
if (skipLabel()) { // #document-fragment
System.err.println("Premature end of test data.");
return false;
}
UntilHashInputStream stream2 = new UntilHashInputStream(aggregateStream);
while (stream2.read() != -1) {
// spin
}
}
if (hadScriptingDirective && skipLabel()) { // #script-on/off
System.err.println("Premature end of test data.");
return false;
}
if (skipLabel()) { // #document
System.err.println("Premature end of test data.");
return false;
}
StringBuilder expectedBuilder = new StringBuilder();
br = new BufferedReader(new InputStreamReader(
new UntilHashInputStream(aggregateStream), "UTF-8"));
int ch;
while ((ch = br.read()) != -1) {
expectedBuilder.append((char)ch);
}
String expected = expectedBuilder.toString();
String actual = sw.toString();
LinkedList<String> actualErrors = leh.getErrors();
if (expected.equals(actual) || (streaming && leh.isFatal()) /*
* && expectedErrors.size() ==
* actualErrors.size()
*/) {
System.err.println("Success.");
// System.err.println(stream);
} else {
System.err.print("Failure.\nData:\n" + stream + "\nExpected:\n"
+ expected + "Got: \n" + actual);
System.err.println("Expected errors:");
for (String err : expectedErrors) {
System.err.println(err);
}
System.err.println("Actual errors:");
for (String err : actualErrors) {
System.err.println(err);
}
}
} catch (Throwable t) {
System.err.println("Failure.\nData:\n" + stream);
throw t;
}
return true;
}
private boolean skipLabel() throws IOException {
int b = aggregateStream.read();
if (b == -1) {
return true;
}
for (;;) {
b = aggregateStream.read();
if (b == -1) {
return true;
} else if (b == 0x0A) {
return false;
}
}
}
/**
* @param args
* @throws Throwable
*/
public static void main(String[] args) throws Throwable {
for (int i = 0; i < args.length; i++) {
TreeTester tester = new TreeTester(new FileInputStream(args[i]));
tester.runTests();
}
}
}
@@ -0,0 +1,97 @@
/*
* Copyright (c) 2007 Henri Sivonen
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.test;
import java.io.IOException;
import java.io.InputStream;
public class UntilHashInputStream extends InputStream {
private final StringBuilder builder = new StringBuilder();
private final InputStream delegate;
private int buffer = -1;
private boolean closed = false;
/**
* @param delegate
* @throws IOException
*/
public UntilHashInputStream(final InputStream delegate) throws IOException {
this.delegate = delegate;
this.buffer = delegate.read();
if (buffer == '#') {
closed = true;
}
}
public int read() throws IOException {
if (closed) {
return -1;
}
int rv = buffer;
buffer = delegate.read();
if (buffer == '#' && rv == '\n') {
// end of stream
closed = true;
return -1;
} else {
if (rv >= 0x20 && rv < 0x80) {
builder.append(((char)rv));
} else {
builder.append("0x");
builder.append(Integer.toHexString(rv));
}
return rv;
}
}
/**
* @see java.io.InputStream#close()
*/
@Override
public void close() throws IOException {
super.close();
if (closed) {
return;
}
for (;;) {
int b = delegate.read();
if (b == 0x23 || b == -1) {
break;
}
}
closed = true;
}
/**
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
return builder.toString();
}
}
@@ -0,0 +1,63 @@
/*
* Copyright (c) 2008 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.test;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
import nu.validator.htmlparser.sax.XmlSerializer;
public class XmlSerializerTester {
/**
* @param args
* @throws SAXException
*/
public static void main(String[] args) throws SAXException {
AttributesImpl attrs = new AttributesImpl();
XmlSerializer serializer = new XmlSerializer(System.out);
serializer.startDocument();
serializer.startElement("1", "a", null, attrs);
serializer.startElement("1", "b", null, attrs);
serializer.endElement("1", "b", null);
serializer.startElement("2", "c", null, attrs);
serializer.endElement("2", "c", null);
attrs.addAttribute("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "about", null, "CDATA", "");
serializer.startElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "d", null, attrs);
serializer.endElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "d", null);
serializer.startPrefixMapping("rdf", "foo");
serializer.startElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "e", null, attrs);
serializer.startPrefixMapping("p0", "bar");
serializer.startElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "f", null, attrs);
serializer.characters("a\uD834\uDD21a\uD834a\uDD21a".toCharArray(), 0, 8);
serializer.endElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "f", null);
serializer.endElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "e", null);
serializer.endPrefixMapping("rdf");
serializer.endElement("1", "a", null);
serializer.endDocument();
}
}
@@ -0,0 +1,33 @@
/*
* Copyright (c) 2009 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.test;
import nu.xom.Attribute;
import nu.xom.Element;
public class XomTest {
public static void main(String[] args) {
Element elt = new Element("html", "http://www.w3.org/1999/xhtml");
elt.addAttribute(new Attribute("xmlns:foo", "bar"));
}
}
@@ -0,0 +1,29 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<html>
<head><title>Package Overview</title>
<!--
Copyright (c) 2007 Henri Sivonen
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
-->
</head>
<body bgcolor="white">
<p>Test drivers.</p>
</body>
</html>
@@ -0,0 +1,87 @@
/*
* Copyright (c) 2008 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.tools;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.MalformedURLException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import nu.validator.htmlparser.common.XmlViolationPolicy;
import nu.validator.htmlparser.sax.HtmlParser;
import nu.validator.htmlparser.sax.HtmlSerializer;
import nu.validator.htmlparser.sax.XmlSerializer;
import nu.validator.htmlparser.test.SystemErrErrorHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
public class HTML2HTML {
/**
* @param args
*/
public static void main(String[] args) throws SAXException,
ParserConfigurationException, MalformedURLException, IOException,
TransformerException {
InputStream in;
OutputStream out;
switch (args.length) {
case 0:
in = System.in;
out = System.out;
break;
case 1:
in = new FileInputStream(args[0]);
out = System.out;
break;
case 2:
in = new FileInputStream(args[0]);
out = new FileOutputStream(args[1]);
break;
default:
System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second.");
System.exit(1);
return;
}
ContentHandler serializer = new HtmlSerializer(out);
HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALLOW);
parser.setErrorHandler(new SystemErrErrorHandler());
parser.setContentHandler(serializer);
parser.setProperty("http://xml.org/sax/properties/lexical-handler",
serializer);
parser.parse(new InputSource(in));
out.flush();
out.close();
}
}
@@ -0,0 +1,86 @@
/*
* Copyright (c) 2008 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.tools;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.MalformedURLException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import nu.validator.htmlparser.common.XmlViolationPolicy;
import nu.validator.htmlparser.sax.HtmlParser;
import nu.validator.htmlparser.sax.XmlSerializer;
import nu.validator.htmlparser.test.SystemErrErrorHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
public class HTML2XML {
/**
* @param args
*/
public static void main(String[] args) throws SAXException,
ParserConfigurationException, MalformedURLException, IOException,
TransformerException {
InputStream in;
OutputStream out;
switch (args.length) {
case 0:
in = System.in;
out = System.out;
break;
case 1:
in = new FileInputStream(args[0]);
out = System.out;
break;
case 2:
in = new FileInputStream(args[0]);
out = new FileOutputStream(args[1]);
break;
default:
System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second.");
System.exit(1);
return;
}
ContentHandler serializer = new XmlSerializer(out);
HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET);
parser.setErrorHandler(new SystemErrErrorHandler());
parser.setContentHandler(serializer);
parser.setProperty("http://xml.org/sax/properties/lexical-handler",
serializer);
parser.parse(new InputSource(in));
out.flush();
out.close();
}
}
@@ -0,0 +1,89 @@
/*
* Copyright (c) 2008 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.tools;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.MalformedURLException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.TransformerException;
import nu.validator.htmlparser.sax.HtmlSerializer;
import nu.validator.htmlparser.sax.XmlSerializer;
import nu.validator.htmlparser.test.SystemErrErrorHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
public class XML2HTML {
/**
* @param args
*/
public static void main(String[] args) throws SAXException,
ParserConfigurationException, MalformedURLException, IOException,
TransformerException {
InputStream in;
OutputStream out;
switch (args.length) {
case 0:
in = System.in;
out = System.out;
break;
case 1:
in = new FileInputStream(args[0]);
out = System.out;
break;
case 2:
in = new FileInputStream(args[0]);
out = new FileOutputStream(args[1]);
break;
default:
System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second.");
System.exit(1);
return;
}
ContentHandler serializer = new HtmlSerializer(out);
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
factory.setValidating(false);
XMLReader parser = factory.newSAXParser().getXMLReader();
parser.setErrorHandler(new SystemErrErrorHandler());
parser.setContentHandler(serializer);
parser.setProperty("http://xml.org/sax/properties/lexical-handler",
serializer);
parser.parse(new InputSource(in));
out.flush();
out.close();
}
}
@@ -0,0 +1,89 @@
/*
* Copyright (c) 2008 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.tools;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.MalformedURLException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.TransformerException;
import nu.validator.htmlparser.sax.NameCheckingXmlSerializer;
import nu.validator.htmlparser.sax.XmlSerializer;
import nu.validator.htmlparser.test.SystemErrErrorHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
public class XML2XML {
/**
* @param args
*/
public static void main(String[] args) throws SAXException,
ParserConfigurationException, MalformedURLException, IOException,
TransformerException {
InputStream in;
OutputStream out;
switch (args.length) {
case 0:
in = System.in;
out = System.out;
break;
case 1:
in = new FileInputStream(args[0]);
out = System.out;
break;
case 2:
in = new FileInputStream(args[0]);
out = new FileOutputStream(args[1]);
break;
default:
System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second.");
System.exit(1);
return;
}
ContentHandler serializer = new NameCheckingXmlSerializer(out);
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
factory.setValidating(false);
XMLReader parser = factory.newSAXParser().getXMLReader();
parser.setErrorHandler(new SystemErrErrorHandler());
parser.setContentHandler(serializer);
parser.setProperty("http://xml.org/sax/properties/lexical-handler",
serializer);
parser.parse(new InputSource(in));
out.flush();
out.close();
}
}
@@ -0,0 +1,237 @@
/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2007 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.tools;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.Templates;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXResult;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TemplatesHandler;
import javax.xml.transform.sax.TransformerHandler;
import nu.validator.htmlparser.common.XmlViolationPolicy;
import nu.validator.htmlparser.dom.HtmlDocumentBuilder;
import nu.validator.htmlparser.sax.HtmlParser;
import nu.validator.htmlparser.sax.HtmlSerializer;
import nu.validator.htmlparser.sax.XmlSerializer;
import nu.validator.htmlparser.test.SystemErrErrorHandler;
import org.w3c.dom.Document;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.ext.LexicalHandler;
public class XSLT4HTML5 {
private enum Mode {
STREAMING_SAX, BUFFERED_SAX, DOM,
}
private static final String TEMPLATE = "--template=";
private static final String INPUT_HTML = "--input-html=";
private static final String INPUT_XML = "--input-xml=";
private static final String OUTPUT_HTML = "--output-html=";
private static final String OUTPUT_XML = "--output-xml=";
private static final String MODE = "--mode=";
/**
* @param args
* @throws ParserConfigurationException
* @throws SAXException
* @throws IOException
* @throws MalformedURLException
* @throws TransformerException
*/
public static void main(String[] args) throws SAXException,
ParserConfigurationException, MalformedURLException, IOException, TransformerException {
if (args.length == 0) {
System.out.println("--template=file --input-[html|xml]=file --output-[html|xml]=file --mode=[sax-streaming|sax-buffered|dom]");
System.exit(0);
}
String template = null;
String input = null;
boolean inputHtml = false;
String output = null;
boolean outputHtml = false;
Mode mode = null;
for (int i = 0; i < args.length; i++) {
String arg = args[i];
if (arg.startsWith(TEMPLATE)) {
if (template == null) {
template = arg.substring(TEMPLATE.length());
} else {
System.err.println("Tried to set template twice.");
System.exit(1);
}
} else if (arg.startsWith(INPUT_HTML)) {
if (input == null) {
input = arg.substring(INPUT_HTML.length());
inputHtml = true;
} else {
System.err.println("Tried to set input twice.");
System.exit(2);
}
} else if (arg.startsWith(INPUT_XML)) {
if (input == null) {
input = arg.substring(INPUT_XML.length());
inputHtml = false;
} else {
System.err.println("Tried to set input twice.");
System.exit(2);
}
} else if (arg.startsWith(OUTPUT_HTML)) {
if (output == null) {
output = arg.substring(OUTPUT_HTML.length());
outputHtml = true;
} else {
System.err.println("Tried to set output twice.");
System.exit(3);
}
} else if (arg.startsWith(OUTPUT_XML)) {
if (output == null) {
output = arg.substring(OUTPUT_XML.length());
outputHtml = false;
} else {
System.err.println("Tried to set output twice.");
System.exit(3);
}
} else if (arg.startsWith(MODE)) {
if (mode == null) {
String modeStr = arg.substring(MODE.length());
if ("dom".equals(modeStr)) {
mode = Mode.DOM;
} else if ("sax-buffered".equals(modeStr)) {
mode = Mode.BUFFERED_SAX;
} else if ("sax-streaming".equals(modeStr)) {
mode = Mode.STREAMING_SAX;
} else {
System.err.println("Unrecognized mode.");
System.exit(5);
}
} else {
System.err.println("Tried to set mode twice.");
System.exit(4);
}
}
}
if (template == null) {
System.err.println("No template specified.");
System.exit(6);
}
if (input == null) {
System.err.println("No input specified.");
System.exit(7);
}
if (output == null) {
System.err.println("No output specified.");
System.exit(8);
}
if (mode == null) {
mode = Mode.BUFFERED_SAX;
}
SystemErrErrorHandler errorHandler = new SystemErrErrorHandler();
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
factory.setValidating(false);
XMLReader reader = factory.newSAXParser().getXMLReader();
reader.setErrorHandler(errorHandler);
SAXTransformerFactory transformerFactory = (SAXTransformerFactory) TransformerFactory.newInstance();
transformerFactory.setErrorListener(errorHandler);
TemplatesHandler templatesHandler = transformerFactory.newTemplatesHandler();
reader.setContentHandler(templatesHandler);
reader.parse(new File(template).toURI().toASCIIString());
Templates templates = templatesHandler.getTemplates();
FileOutputStream outputStream = new FileOutputStream(output);
ContentHandler serializer;
if (outputHtml) {
serializer = new HtmlSerializer(outputStream);
} else {
serializer = new XmlSerializer(outputStream);
}
SAXResult result = new SAXResult(new XmlnsDropper(serializer));
result.setLexicalHandler((LexicalHandler) serializer);
if (mode == Mode.DOM) {
Document inputDoc;
DocumentBuilder builder;
if (inputHtml) {
builder = new HtmlDocumentBuilder(XmlViolationPolicy.ALTER_INFOSET);
} else {
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
try {
builder = builderFactory.newDocumentBuilder();
} catch (ParserConfigurationException e) {
throw new RuntimeException(e);
}
}
inputDoc = builder.parse(new File(input));
DOMSource inputSource = new DOMSource(inputDoc,
new File(input).toURI().toASCIIString());
Transformer transformer = templates.newTransformer();
transformer.setErrorListener(errorHandler);
transformer.transform(inputSource, result);
} else {
if (inputHtml) {
reader = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET);
if (mode == Mode.STREAMING_SAX) {
reader.setProperty("http://validator.nu/properties/streamability-violation-policy", XmlViolationPolicy.FATAL);
}
}
TransformerHandler transformerHandler = transformerFactory.newTransformerHandler(templates);
transformerHandler.setResult(result);
reader.setErrorHandler(errorHandler);
reader.setContentHandler(transformerHandler);
reader.setProperty("http://xml.org/sax/properties/lexical-handler", transformerHandler);
reader.parse(new File(input).toURI().toASCIIString());
}
outputStream.flush();
outputStream.close();
}
}
@@ -0,0 +1,162 @@
/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2007 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.tools;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import nu.validator.htmlparser.common.XmlViolationPolicy;
import nu.validator.htmlparser.sax.HtmlSerializer;
import nu.validator.htmlparser.xom.HtmlBuilder;
import nu.xom.Builder;
import nu.xom.Document;
import nu.xom.Element;
import nu.xom.Nodes;
import nu.xom.ParsingException;
import nu.xom.Serializer;
import nu.xom.ValidityException;
import nu.xom.converters.SAXConverter;
import nu.xom.xslt.XSLException;
import nu.xom.xslt.XSLTransform;
import org.xml.sax.SAXException;
public class XSLT4HTML5XOM {
private static final String TEMPLATE = "--template=";
private static final String INPUT_HTML = "--input-html=";
private static final String INPUT_XML = "--input-xml=";
private static final String OUTPUT_HTML = "--output-html=";
private static final String OUTPUT_XML = "--output-xml=";
/**
* @param args
* @throws IOException
* @throws ParsingException
* @throws ValidityException
* @throws XSLException
* @throws SAXException
*/
public static void main(String[] args) throws ValidityException,
ParsingException, IOException, XSLException, SAXException {
if (args.length == 0) {
System.out.println("--template=file --input-[html|xml]=file --output-[html|xml]=file --mode=[sax-streaming|sax-buffered|dom]");
System.exit(0);
}
String template = null;
String input = null;
boolean inputHtml = false;
String output = null;
boolean outputHtml = false;
for (int i = 0; i < args.length; i++) {
String arg = args[i];
if (arg.startsWith(TEMPLATE)) {
if (template == null) {
template = arg.substring(TEMPLATE.length());
} else {
System.err.println("Tried to set template twice.");
System.exit(1);
}
} else if (arg.startsWith(INPUT_HTML)) {
if (input == null) {
input = arg.substring(INPUT_HTML.length());
inputHtml = true;
} else {
System.err.println("Tried to set input twice.");
System.exit(2);
}
} else if (arg.startsWith(INPUT_XML)) {
if (input == null) {
input = arg.substring(INPUT_XML.length());
inputHtml = false;
} else {
System.err.println("Tried to set input twice.");
System.exit(2);
}
} else if (arg.startsWith(OUTPUT_HTML)) {
if (output == null) {
output = arg.substring(OUTPUT_HTML.length());
outputHtml = true;
} else {
System.err.println("Tried to set output twice.");
System.exit(3);
}
} else if (arg.startsWith(OUTPUT_XML)) {
if (output == null) {
output = arg.substring(OUTPUT_XML.length());
outputHtml = false;
} else {
System.err.println("Tried to set output twice.");
System.exit(3);
}
}
}
if (template == null) {
System.err.println("No template specified.");
System.exit(6);
}
if (input == null) {
System.err.println("No input specified.");
System.exit(7);
}
if (output == null) {
System.err.println("No output specified.");
System.exit(8);
}
Builder builder = new Builder();
Document transformationDoc = builder.build(new File(template));
XSLTransform transform = new XSLTransform(transformationDoc);
FileOutputStream outputStream = new FileOutputStream(output);
Document inputDoc;
if (inputHtml) {
builder = new HtmlBuilder(XmlViolationPolicy.ALTER_INFOSET);
}
inputDoc = builder.build(new File(input));
Nodes result = transform.transform(inputDoc);
Document outputDoc = new Document((Element) result.get(0));
if (outputHtml) {
HtmlSerializer htmlSerializer = new HtmlSerializer(outputStream);
SAXConverter converter = new SAXConverter(htmlSerializer);
converter.setLexicalHandler(htmlSerializer);
converter.convert(outputDoc);
} else {
Serializer serializer = new Serializer(outputStream);
serializer.write(outputDoc);
}
outputStream.flush();
outputStream.close();
}
}
@@ -0,0 +1,169 @@
/*
* Copyright (c) 2007 Henri Sivonen
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.tools;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
/**
* Quick and dirty hack to work around Xalan xmlns weirdness.
*
* @version $Id$
* @author hsivonen
*/
class XmlnsDropper implements ContentHandler {
private final ContentHandler delegate;
/**
* @param delegate
*/
public XmlnsDropper(final ContentHandler delegate) {
this.delegate = delegate;
}
/**
* @param ch
* @param start
* @param length
* @throws SAXException
* @see org.xml.sax.ContentHandler#characters(char[], int, int)
*/
public void characters(char[] ch, int start, int length) throws SAXException {
delegate.characters(ch, start, length);
}
/**
* @throws SAXException
* @see org.xml.sax.ContentHandler#endDocument()
*/
public void endDocument() throws SAXException {
delegate.endDocument();
}
/**
* @param uri
* @param localName
* @param qName
* @throws SAXException
* @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
*/
public void endElement(String uri, String localName, String qName) throws SAXException {
delegate.endElement(uri, localName, qName);
}
/**
* @param prefix
* @throws SAXException
* @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String)
*/
public void endPrefixMapping(String prefix) throws SAXException {
delegate.endPrefixMapping(prefix);
}
/**
* @param ch
* @param start
* @param length
* @throws SAXException
* @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
*/
public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
delegate.ignorableWhitespace(ch, start, length);
}
/**
* @param target
* @param data
* @throws SAXException
* @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String)
*/
public void processingInstruction(String target, String data) throws SAXException {
delegate.processingInstruction(target, data);
}
/**
* @param locator
* @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator)
*/
public void setDocumentLocator(Locator locator) {
delegate.setDocumentLocator(locator);
}
/**
* @param name
* @throws SAXException
* @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String)
*/
public void skippedEntity(String name) throws SAXException {
delegate.skippedEntity(name);
}
/**
* @throws SAXException
* @see org.xml.sax.ContentHandler#startDocument()
*/
public void startDocument() throws SAXException {
delegate.startDocument();
}
/**
* @param uri
* @param localName
* @param qName
* @param atts
* @throws SAXException
* @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
*/
public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
AttributesImpl ai = new AttributesImpl();
for (int i = 0; i < atts.getLength(); i++) {
String u = atts.getURI(i);
String t = atts.getType(i);
String v = atts.getValue(i);
String n = atts.getLocalName(i);
String q = atts.getQName(i);
if (q != null) {
if ("xmlns".equals(q) || q.startsWith("xmlns:")) {
continue;
}
}
ai.addAttribute(u, n, q, t, v);
}
delegate.startElement(uri, localName, qName, ai);
}
/**
* @param prefix
* @param uri
* @throws SAXException
* @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, java.lang.String)
*/
public void startPrefixMapping(String prefix, String uri) throws SAXException {
delegate.startPrefixMapping(prefix, uri);
}
}
@@ -0,0 +1,29 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<html>
<head><title>Package Overview</title>
<!--
Copyright (c) 2007 Henri Sivonen
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
-->
</head>
<body bgcolor="white">
<p>Demo apps.</p>
</body>
</html>