mirror of
https://github.com/ManchildProductions/UXP-Fixed.git
synced 2026-06-13 13:28:39 +00:00
Add java htmlparser sources that match the original 52-level state
https://hg.mozilla.org/projects/htmlparser/ Commit: abe62ab2a9b69ccb3b5d8a231ec1ae11154c571d
This commit is contained in:
+115
@@ -0,0 +1,115 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Henri Sivonen
|
||||
* Copyright (c) 2008 Mozilla Foundation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.test;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
|
||||
import nu.validator.htmlparser.common.Heuristics;
|
||||
import nu.validator.htmlparser.io.Encoding;
|
||||
import nu.validator.htmlparser.io.HtmlInputStreamReader;
|
||||
|
||||
import org.xml.sax.ErrorHandler;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
public class DecoderLoopTester {
|
||||
|
||||
private static final int LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
|
||||
|
||||
private static final int NUMBER_OR_ASTRAL_CHARS = 24500;
|
||||
|
||||
private void runTest(int padding) throws SAXException, IOException {
|
||||
Encoding utf8 = Encoding.forName("UTF-8");
|
||||
char[] charArr = new char[1 + padding + 2 * NUMBER_OR_ASTRAL_CHARS];
|
||||
byte[] byteArr;
|
||||
int i = 0;
|
||||
charArr[i++] = '\uFEFF';
|
||||
for (int j = 0; j < padding; j++) {
|
||||
charArr[i++] = 'x';
|
||||
}
|
||||
for (int j = 0; j < NUMBER_OR_ASTRAL_CHARS; j++) {
|
||||
int value = 0x10000 + j;
|
||||
charArr[i++] = (char) (LEAD_OFFSET + (value >> 10));
|
||||
charArr[i++] = (char) (0xDC00 + (value & 0x3FF));
|
||||
// charArr[i++] = 'y';
|
||||
// charArr[i++] = 'z';
|
||||
|
||||
}
|
||||
CharBuffer charBuffer = CharBuffer.wrap(charArr);
|
||||
CharsetEncoder enc = utf8.newEncoder();
|
||||
enc.onMalformedInput(CodingErrorAction.REPORT);
|
||||
enc.onUnmappableCharacter(CodingErrorAction.REPORT);
|
||||
ByteBuffer byteBuffer = enc.encode(charBuffer);
|
||||
byteArr = new byte[byteBuffer.limit()];
|
||||
byteBuffer.get(byteArr);
|
||||
|
||||
ErrorHandler eh = new SystemErrErrorHandler();
|
||||
compare(new HtmlInputStreamReader(new ByteArrayInputStream(byteArr), eh, null, null, Heuristics.NONE), padding, charArr, byteArr);
|
||||
compare(new HtmlInputStreamReader(new ByteArrayInputStream(byteArr), eh, null, null, utf8), padding, charArr, byteArr);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param padding
|
||||
* @param charArr
|
||||
* @param byteArr
|
||||
* @throws SAXException
|
||||
* @throws IOException
|
||||
*/
|
||||
private void compare(HtmlInputStreamReader reader, int padding, char[] charArr, byte[] byteArr) throws SAXException, IOException {
|
||||
char[] readBuffer = new char[2048];
|
||||
int offset = 0;
|
||||
int num = 0;
|
||||
int readNum = 0;
|
||||
while ((num = reader.read(readBuffer)) != -1) {
|
||||
for (int j = 0; j < num; j++) {
|
||||
System.out.println(offset + j);
|
||||
if (readBuffer[j] != charArr[offset + j]) {
|
||||
throw new RuntimeException("Test failed. Char: " + Integer.toHexString(readBuffer[j]) + " j: " + j + " readNum: " + readNum);
|
||||
}
|
||||
}
|
||||
offset += num;
|
||||
readNum++;
|
||||
}
|
||||
}
|
||||
|
||||
void runTests() throws SAXException, IOException {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
runTest(i);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param args
|
||||
* @throws IOException
|
||||
* @throws SAXException
|
||||
*/
|
||||
public static void main(String[] args) throws IOException, SAXException {
|
||||
new DecoderLoopTester().runTests();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2008 Mozilla Foundation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.w3c.dom.Document;
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import nu.validator.htmlparser.dom.HtmlDocumentBuilder;
|
||||
|
||||
public class DomIdTester {
|
||||
|
||||
private static final String testSrc = "<div><h1 id='bar' class='foo'>buoeoa</h1><p id='foo'>uoeuo</p></div>";
|
||||
|
||||
/**
|
||||
* @param args
|
||||
* @throws IOException
|
||||
* @throws SAXException
|
||||
*/
|
||||
public static void main(String[] args) throws SAXException, IOException {
|
||||
HtmlDocumentBuilder builder = new HtmlDocumentBuilder();
|
||||
Document doc = builder.parse(new InputSource(new StringReader(testSrc)));
|
||||
System.out.println(doc.getElementById("foo").getLocalName());
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2009 Mozilla Foundation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.test;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
|
||||
public class DomTest {
|
||||
public static void main(String[] args) throws Exception {
|
||||
DocumentBuilderFactory f = DocumentBuilderFactory.newInstance();
|
||||
f.setNamespaceAware(true); // not setting this causes pain and suffering with SVG
|
||||
DocumentBuilder b = f.newDocumentBuilder();
|
||||
Document d = b.newDocument();
|
||||
Element e = d.createElementNS("http://www.w3.org/1999/xhtml", "html");
|
||||
e.setAttribute("xmlns:foo", "bar");
|
||||
}
|
||||
}
|
||||
+123
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Henri Sivonen
|
||||
* Copyright (c) 2008 Mozilla Foundation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.test;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import nu.validator.htmlparser.common.Heuristics;
|
||||
import nu.validator.htmlparser.io.Encoding;
|
||||
import nu.validator.htmlparser.io.HtmlInputStreamReader;
|
||||
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
public class EncodingTester {
|
||||
|
||||
private final InputStream aggregateStream;
|
||||
|
||||
private final StringBuilder builder = new StringBuilder();
|
||||
|
||||
/**
|
||||
* @param aggregateStream
|
||||
*/
|
||||
public EncodingTester(InputStream aggregateStream) {
|
||||
this.aggregateStream = aggregateStream;
|
||||
}
|
||||
|
||||
private void runTests() throws IOException, SAXException {
|
||||
while (runTest()) {
|
||||
// spin
|
||||
}
|
||||
}
|
||||
|
||||
private boolean runTest() throws IOException, SAXException {
|
||||
if (skipLabel()) {
|
||||
return false;
|
||||
}
|
||||
UntilHashInputStream stream = new UntilHashInputStream(aggregateStream);
|
||||
HtmlInputStreamReader reader = new HtmlInputStreamReader(stream, null,
|
||||
null, null, Heuristics.NONE);
|
||||
Charset charset = reader.getCharset();
|
||||
stream.close();
|
||||
if (skipLabel()) {
|
||||
System.err.println("Premature end of test data.");
|
||||
return false;
|
||||
}
|
||||
builder.setLength(0);
|
||||
loop: for (;;) {
|
||||
int b = aggregateStream.read();
|
||||
switch (b) {
|
||||
case '\n':
|
||||
break loop;
|
||||
case -1:
|
||||
System.err.println("Premature end of test data.");
|
||||
return false;
|
||||
default:
|
||||
builder.append(((char) b));
|
||||
}
|
||||
}
|
||||
String sniffed = charset.name();
|
||||
String expected = Encoding.forName(builder.toString()).newDecoder().charset().name();
|
||||
if (expected.equalsIgnoreCase(sniffed)) {
|
||||
System.err.println("Success.");
|
||||
// System.err.println(stream);
|
||||
} else {
|
||||
System.err.println("Failure. Expected: " + expected + " got "
|
||||
+ sniffed + ".");
|
||||
System.err.println(stream);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean skipLabel() throws IOException {
|
||||
int b = aggregateStream.read();
|
||||
if (b == -1) {
|
||||
return true;
|
||||
}
|
||||
for (;;) {
|
||||
b = aggregateStream.read();
|
||||
if (b == -1) {
|
||||
return true;
|
||||
} else if (b == 0x0A) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param args
|
||||
* @throws SAXException
|
||||
* @throws IOException
|
||||
*/
|
||||
public static void main(String[] args) throws IOException, SAXException {
|
||||
for (int i = 0; i < args.length; i++) {
|
||||
EncodingTester tester = new EncodingTester(new FileInputStream(
|
||||
args[i]));
|
||||
tester.runTests();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
+185
@@ -0,0 +1,185 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Henri Sivonen
|
||||
* Copyright (c) 2008 Mozilla Foundation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.test;
|
||||
|
||||
import nu.validator.htmlparser.common.TokenHandler;
|
||||
import nu.validator.htmlparser.impl.ElementName;
|
||||
import nu.validator.htmlparser.impl.HtmlAttributes;
|
||||
import nu.validator.htmlparser.impl.Tokenizer;
|
||||
|
||||
import org.xml.sax.ErrorHandler;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.SAXParseException;
|
||||
|
||||
import com.sdicons.json.model.JSONArray;
|
||||
import com.sdicons.json.model.JSONBoolean;
|
||||
import com.sdicons.json.model.JSONNull;
|
||||
import com.sdicons.json.model.JSONObject;
|
||||
import com.sdicons.json.model.JSONString;
|
||||
|
||||
public class JSONArrayTokenHandler implements TokenHandler, ErrorHandler {
|
||||
|
||||
private static final JSONString DOCTYPE = new JSONString("DOCTYPE");
|
||||
|
||||
private static final JSONString START_TAG = new JSONString("StartTag");
|
||||
|
||||
private static final JSONString END_TAG = new JSONString("EndTag");
|
||||
|
||||
private static final JSONString COMMENT = new JSONString("Comment");
|
||||
|
||||
private static final JSONString CHARACTER = new JSONString("Character");
|
||||
|
||||
private static final JSONString PARSE_ERROR = new JSONString("ParseError");
|
||||
|
||||
private static final char[] REPLACEMENT_CHARACTER = { '\uFFFD' };
|
||||
|
||||
private final StringBuilder builder = new StringBuilder();
|
||||
|
||||
private JSONArray array = null;
|
||||
|
||||
private int contentModelFlag;
|
||||
|
||||
private String contentModelElement;
|
||||
|
||||
public void setContentModelFlag(int contentModelFlag, String contentModelElement) {
|
||||
this.contentModelFlag = contentModelFlag;
|
||||
this.contentModelElement = contentModelElement;
|
||||
}
|
||||
|
||||
public void characters(char[] buf, int start, int length)
|
||||
throws SAXException {
|
||||
builder.append(buf, start, length);
|
||||
}
|
||||
|
||||
private void flushCharacters() {
|
||||
if (builder.length() > 0) {
|
||||
JSONArray token = new JSONArray();
|
||||
token.getValue().add(CHARACTER);
|
||||
token.getValue().add(new JSONString(builder.toString()));
|
||||
array.getValue().add(token);
|
||||
builder.setLength(0);
|
||||
}
|
||||
}
|
||||
|
||||
public void comment(char[] buf, int start, int length) throws SAXException {
|
||||
flushCharacters();
|
||||
JSONArray token = new JSONArray();
|
||||
token.getValue().add(COMMENT);
|
||||
token.getValue().add(new JSONString(new String(buf, start, length)));
|
||||
array.getValue().add(token);
|
||||
}
|
||||
|
||||
public void doctype(String name, String publicIdentifier, String systemIdentifier, boolean forceQuirks) throws SAXException {
|
||||
flushCharacters();
|
||||
JSONArray token = new JSONArray();
|
||||
token.getValue().add(DOCTYPE);
|
||||
token.getValue().add(new JSONString(name));
|
||||
token.getValue().add(publicIdentifier == null ? JSONNull.NULL : new JSONString(publicIdentifier));
|
||||
token.getValue().add(systemIdentifier == null ? JSONNull.NULL : new JSONString(systemIdentifier));
|
||||
token.getValue().add(new JSONBoolean(!forceQuirks));
|
||||
array.getValue().add(token);
|
||||
}
|
||||
|
||||
public void endTag(ElementName eltName) throws SAXException {
|
||||
String name = eltName.name;
|
||||
flushCharacters();
|
||||
JSONArray token = new JSONArray();
|
||||
token.getValue().add(END_TAG);
|
||||
token.getValue().add(new JSONString(name));
|
||||
array.getValue().add(token);
|
||||
}
|
||||
|
||||
public void eof() throws SAXException {
|
||||
flushCharacters();
|
||||
}
|
||||
|
||||
public void startTokenization(Tokenizer self) throws SAXException {
|
||||
array = new JSONArray();
|
||||
if (contentModelElement != null) {
|
||||
self.setStateAndEndTagExpectation(contentModelFlag, contentModelElement);
|
||||
}
|
||||
}
|
||||
|
||||
public void startTag(ElementName eltName, HtmlAttributes attributes,
|
||||
boolean selfClosing) throws SAXException {
|
||||
String name = eltName.name;
|
||||
flushCharacters();
|
||||
JSONArray token = new JSONArray();
|
||||
token.getValue().add(START_TAG);
|
||||
token.getValue().add(new JSONString(name));
|
||||
JSONObject attrs = new JSONObject();
|
||||
for (int i = 0; i < attributes.getLength(); i++) {
|
||||
attrs.getValue().put(attributes.getQNameNoBoundsCheck(i),
|
||||
new JSONString(attributes.getValueNoBoundsCheck(i)));
|
||||
}
|
||||
token.getValue().add(attrs);
|
||||
if (selfClosing) {
|
||||
token.getValue().add(JSONBoolean.TRUE);
|
||||
}
|
||||
array.getValue().add(token);
|
||||
}
|
||||
|
||||
public boolean wantsComments() throws SAXException {
|
||||
return true;
|
||||
}
|
||||
|
||||
public void error(SAXParseException exception) throws SAXException {
|
||||
flushCharacters();
|
||||
array.getValue().add(PARSE_ERROR);
|
||||
}
|
||||
|
||||
public void fatalError(SAXParseException exception) throws SAXException {
|
||||
throw new RuntimeException("Should never happen.");
|
||||
}
|
||||
|
||||
public void warning(SAXParseException exception) throws SAXException {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the array.
|
||||
*
|
||||
* @return the array
|
||||
*/
|
||||
public JSONArray getArray() {
|
||||
return array;
|
||||
}
|
||||
|
||||
public void endTokenization() throws SAXException {
|
||||
|
||||
}
|
||||
|
||||
@Override public void zeroOriginatingReplacementCharacter()
|
||||
throws SAXException {
|
||||
builder.append(REPLACEMENT_CHARACTER, 0, 1);
|
||||
}
|
||||
|
||||
@Override public boolean cdataSectionAllowed() throws SAXException {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override public void ensureBufferSpace(int inputLength)
|
||||
throws SAXException {
|
||||
}
|
||||
|
||||
}
|
||||
+66
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Henri Sivonen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.test;
|
||||
|
||||
import java.util.LinkedList;
|
||||
|
||||
import org.xml.sax.ErrorHandler;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.SAXParseException;
|
||||
|
||||
public class ListErrorHandler implements ErrorHandler {
|
||||
|
||||
private boolean fatal = false;
|
||||
|
||||
private LinkedList<String> errors = new LinkedList<String>();
|
||||
|
||||
public void error(SAXParseException spe) throws SAXException {
|
||||
errors.add(Integer.toString(spe.getColumnNumber()) + ": " + spe.getMessage());
|
||||
}
|
||||
|
||||
public void fatalError(SAXParseException arg0) throws SAXException {
|
||||
fatal = true;
|
||||
}
|
||||
|
||||
public void warning(SAXParseException arg0) throws SAXException {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the errors.
|
||||
*
|
||||
* @return the errors
|
||||
*/
|
||||
public LinkedList<String> getErrors() {
|
||||
return errors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the fatal.
|
||||
*
|
||||
* @return the fatal
|
||||
*/
|
||||
public boolean isFatal() {
|
||||
return fatal;
|
||||
}
|
||||
|
||||
}
|
||||
+201
@@ -0,0 +1,201 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2007 Henri Sivonen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.io.Writer;
|
||||
|
||||
import javax.xml.transform.ErrorListener;
|
||||
import javax.xml.transform.SourceLocator;
|
||||
import javax.xml.transform.TransformerException;
|
||||
|
||||
import org.xml.sax.ErrorHandler;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.SAXParseException;
|
||||
|
||||
/**
|
||||
* @version $Id$
|
||||
* @author hsivonen
|
||||
*/
|
||||
public class SystemErrErrorHandler implements ErrorHandler, ErrorListener {
|
||||
|
||||
private Writer out;
|
||||
|
||||
private boolean inError = false;
|
||||
|
||||
public SystemErrErrorHandler() {
|
||||
try {
|
||||
out = new OutputStreamWriter(System.err, "UTF-8");
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException)
|
||||
*/
|
||||
public void warning(SAXParseException e) throws SAXException {
|
||||
try {
|
||||
out.write("Warning:\n");
|
||||
out.write(e.getMessage());
|
||||
out.write("\nFile: ");
|
||||
String systemId = e.getSystemId();
|
||||
out.write((systemId == null) ? "Unknown" : systemId);
|
||||
out.write("\nLine: ");
|
||||
out.write(Integer.toString(e.getLineNumber()));
|
||||
out.write(" Col: ");
|
||||
out.write(Integer.toString(e.getColumnNumber()));
|
||||
out.write("\n\n");
|
||||
out.flush();
|
||||
} catch (IOException e1) {
|
||||
throw new SAXException(e1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException)
|
||||
*/
|
||||
public void error(SAXParseException e) throws SAXException {
|
||||
inError = true;
|
||||
try {
|
||||
out.write("Error:\n");
|
||||
out.write(e.getMessage());
|
||||
out.write("\nFile: ");
|
||||
String systemId = e.getSystemId();
|
||||
out.write((systemId == null) ? "Unknown" : systemId);
|
||||
out.write("\nLine: ");
|
||||
out.write(Integer.toString(e.getLineNumber()));
|
||||
out.write(" Col: ");
|
||||
out.write(Integer.toString(e.getColumnNumber()));
|
||||
out.write("\n\n");
|
||||
out.flush();
|
||||
} catch (IOException e1) {
|
||||
throw new SAXException(e1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException)
|
||||
*/
|
||||
public void fatalError(SAXParseException e) throws SAXException {
|
||||
inError = true;
|
||||
try {
|
||||
out.write("Fatal Error:\n");
|
||||
out.write(e.getMessage());
|
||||
out.write("\nFile: ");
|
||||
String systemId = e.getSystemId();
|
||||
out.write((systemId == null) ? "Unknown" : systemId);
|
||||
out.write("\nLine: ");
|
||||
out.write(Integer.toString(e.getLineNumber()));
|
||||
out.write(" Col: ");
|
||||
out.write(Integer.toString(e.getColumnNumber()));
|
||||
out.write("\n\n");
|
||||
out.flush();
|
||||
} catch (IOException e1) {
|
||||
throw new SAXException(e1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the inError.
|
||||
*
|
||||
* @return the inError
|
||||
*/
|
||||
public boolean isInError() {
|
||||
return inError;
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
inError = false;
|
||||
}
|
||||
|
||||
public void error(TransformerException e) throws TransformerException {
|
||||
inError = true;
|
||||
try {
|
||||
out.write("Error:\n");
|
||||
out.write(e.getMessage());
|
||||
SourceLocator sourceLocator = e.getLocator();
|
||||
if (sourceLocator != null) {
|
||||
out.write("\nFile: ");
|
||||
String systemId = sourceLocator.getSystemId();
|
||||
out.write((systemId == null) ? "Unknown" : systemId);
|
||||
out.write("\nLine: ");
|
||||
out.write(Integer.toString(sourceLocator.getLineNumber()));
|
||||
out.write(" Col: ");
|
||||
out.write(Integer.toString(sourceLocator.getColumnNumber()));
|
||||
}
|
||||
out.write("\n\n");
|
||||
out.flush();
|
||||
} catch (IOException e1) {
|
||||
throw new TransformerException(e1);
|
||||
}
|
||||
}
|
||||
|
||||
public void fatalError(TransformerException e)
|
||||
throws TransformerException {
|
||||
inError = true;
|
||||
try {
|
||||
out.write("Fatal Error:\n");
|
||||
out.write(e.getMessage());
|
||||
SourceLocator sourceLocator = e.getLocator();
|
||||
if (sourceLocator != null) {
|
||||
out.write("\nFile: ");
|
||||
String systemId = sourceLocator.getSystemId();
|
||||
out.write((systemId == null) ? "Unknown" : systemId);
|
||||
out.write("\nLine: ");
|
||||
out.write(Integer.toString(sourceLocator.getLineNumber()));
|
||||
out.write(" Col: ");
|
||||
out.write(Integer.toString(sourceLocator.getColumnNumber()));
|
||||
}
|
||||
out.write("\n\n");
|
||||
out.flush();
|
||||
} catch (IOException e1) {
|
||||
throw new TransformerException(e1);
|
||||
}
|
||||
}
|
||||
|
||||
public void warning(TransformerException e)
|
||||
throws TransformerException {
|
||||
try {
|
||||
out.write("Warning:\n");
|
||||
out.write(e.getMessage());
|
||||
SourceLocator sourceLocator = e.getLocator();
|
||||
if (sourceLocator != null) {
|
||||
out.write("\nFile: ");
|
||||
String systemId = sourceLocator.getSystemId();
|
||||
out.write((systemId == null) ? "Unknown" : systemId);
|
||||
out.write("\nLine: ");
|
||||
out.write(Integer.toString(sourceLocator.getLineNumber()));
|
||||
out.write(" Col: ");
|
||||
out.write(Integer.toString(sourceLocator.getColumnNumber()));
|
||||
}
|
||||
out.write("\n\n");
|
||||
out.flush();
|
||||
} catch (IOException e1) {
|
||||
throw new TransformerException(e1);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,210 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Henri Sivonen
|
||||
* Copyright (c) 2008 Mozilla Foundation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.Writer;
|
||||
|
||||
import nu.validator.htmlparser.common.TokenHandler;
|
||||
import nu.validator.htmlparser.impl.ElementName;
|
||||
import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
|
||||
import nu.validator.htmlparser.impl.HtmlAttributes;
|
||||
import nu.validator.htmlparser.impl.Tokenizer;
|
||||
import nu.validator.htmlparser.io.Driver;
|
||||
|
||||
import org.xml.sax.ErrorHandler;
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.SAXParseException;
|
||||
|
||||
public class TokenPrinter implements TokenHandler, ErrorHandler {
|
||||
|
||||
private final Writer writer;
|
||||
|
||||
public void characters(char[] buf, int start, int length)
|
||||
throws SAXException {
|
||||
try {
|
||||
boolean lineStarted = true;
|
||||
writer.write('-');
|
||||
for (int i = start; i < start + length; i++) {
|
||||
if (!lineStarted) {
|
||||
writer.write("\n-");
|
||||
lineStarted = true;
|
||||
}
|
||||
char c = buf[i];
|
||||
if (c == '\n') {
|
||||
writer.write("\\n");
|
||||
lineStarted = false;
|
||||
} else {
|
||||
writer.write(c);
|
||||
}
|
||||
}
|
||||
writer.write('\n');
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void comment(char[] buf, int start, int length) throws SAXException {
|
||||
try {
|
||||
writer.write('!');
|
||||
writer.write(buf, start, length);
|
||||
writer.write('\n');
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void doctype(String name, String publicIdentifier, String systemIdentifier, boolean forceQuirks) throws SAXException {
|
||||
try {
|
||||
writer.write('D');
|
||||
writer.write(name);
|
||||
writer.write(' ');
|
||||
writer.write("" + forceQuirks);
|
||||
writer.write('\n');
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void endTag(ElementName eltName) throws SAXException {
|
||||
try {
|
||||
writer.write(')');
|
||||
writer.write(eltName.name);
|
||||
writer.write('\n');
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void eof() throws SAXException {
|
||||
try {
|
||||
writer.write("E\n");
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void startTokenization(Tokenizer self) throws SAXException {
|
||||
|
||||
}
|
||||
|
||||
public void startTag(ElementName eltName, HtmlAttributes attributes, boolean selfClosing)
|
||||
throws SAXException {
|
||||
try {
|
||||
writer.write('(');
|
||||
writer.write(eltName.name);
|
||||
writer.write('\n');
|
||||
for (int i = 0; i < attributes.getLength(); i++) {
|
||||
writer.write('A');
|
||||
writer.write(attributes.getQNameNoBoundsCheck(i));
|
||||
writer.write(' ');
|
||||
writer.write(attributes.getValueNoBoundsCheck(i));
|
||||
writer.write('\n');
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean wantsComments() throws SAXException {
|
||||
return true;
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws SAXException, IOException {
|
||||
TokenPrinter printer = new TokenPrinter(new OutputStreamWriter(System.out, "UTF-8"));
|
||||
Driver tokenizer = new Driver(new ErrorReportingTokenizer(printer));
|
||||
tokenizer.setErrorHandler(printer);
|
||||
File file = new File(args[0]);
|
||||
InputSource is = new InputSource(new FileInputStream(file));
|
||||
is.setSystemId(file.toURI().toASCIIString());
|
||||
tokenizer.tokenize(is);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param writer
|
||||
*/
|
||||
public TokenPrinter(final Writer writer) {
|
||||
this.writer = writer;
|
||||
}
|
||||
|
||||
public void error(SAXParseException exception) throws SAXException {
|
||||
try {
|
||||
writer.write("R ");
|
||||
writer.write(exception.getMessage());
|
||||
writer.write("\n");
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void fatalError(SAXParseException exception) throws SAXException {
|
||||
try {
|
||||
writer.write("F ");
|
||||
writer.write(exception.getMessage());
|
||||
writer.write("\n");
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void warning(SAXParseException exception) throws SAXException {
|
||||
try {
|
||||
writer.write("W ");
|
||||
writer.write(exception.getMessage());
|
||||
writer.write("\n");
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void endTokenization() throws SAXException {
|
||||
try {
|
||||
writer.flush();
|
||||
writer.close();
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override public void zeroOriginatingReplacementCharacter()
|
||||
throws SAXException {
|
||||
try {
|
||||
writer.write("0\n");
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override public boolean cdataSectionAllowed() throws SAXException {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override public void ensureBufferSpace(int inputLength)
|
||||
throws SAXException {
|
||||
}
|
||||
}
|
||||
+211
@@ -0,0 +1,211 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Henri Sivonen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.test;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringReader;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.io.Writer;
|
||||
|
||||
import nu.validator.htmlparser.common.XmlViolationPolicy;
|
||||
import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
|
||||
import nu.validator.htmlparser.impl.Tokenizer;
|
||||
import nu.validator.htmlparser.io.Driver;
|
||||
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import antlr.RecognitionException;
|
||||
import antlr.TokenStreamException;
|
||||
|
||||
import com.sdicons.json.model.JSONArray;
|
||||
import com.sdicons.json.model.JSONObject;
|
||||
import com.sdicons.json.model.JSONString;
|
||||
import com.sdicons.json.model.JSONValue;
|
||||
import com.sdicons.json.parser.JSONParser;
|
||||
|
||||
public class TokenizerTester {
|
||||
|
||||
private static JSONString PLAINTEXT = new JSONString("PLAINTEXT state");
|
||||
|
||||
private static JSONString PCDATA = new JSONString("DATA state");
|
||||
|
||||
private static JSONString RCDATA = new JSONString("RCDATA state");
|
||||
|
||||
private static JSONString RAWTEXT = new JSONString("RAWTEXT state");
|
||||
|
||||
private static boolean jsonDeepEquals(JSONValue one, JSONValue other) {
|
||||
if (one.isSimple()) {
|
||||
return one.equals(other);
|
||||
} else if (one.isArray()) {
|
||||
if (other.isArray()) {
|
||||
JSONArray oneArr = (JSONArray) one;
|
||||
JSONArray otherArr = (JSONArray) other;
|
||||
return oneArr.getValue().equals(otherArr.getValue());
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else if (one.isObject()) {
|
||||
if (other.isObject()) {
|
||||
JSONObject oneObject = (JSONObject) one;
|
||||
JSONObject otherObject = (JSONObject) other;
|
||||
return oneObject.getValue().equals(otherObject.getValue());
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
throw new RuntimeException("Should never happen.");
|
||||
}
|
||||
}
|
||||
|
||||
private JSONArray tests;
|
||||
|
||||
private final JSONArrayTokenHandler tokenHandler;
|
||||
|
||||
private final Driver driver;
|
||||
|
||||
private final Writer writer;
|
||||
|
||||
private TokenizerTester(InputStream stream) throws TokenStreamException,
|
||||
RecognitionException, UnsupportedEncodingException {
|
||||
tokenHandler = new JSONArrayTokenHandler();
|
||||
driver = new Driver(new ErrorReportingTokenizer(tokenHandler));
|
||||
driver.setCommentPolicy(XmlViolationPolicy.ALLOW);
|
||||
driver.setContentNonXmlCharPolicy(XmlViolationPolicy.ALLOW);
|
||||
driver.setContentSpacePolicy(XmlViolationPolicy.ALLOW);
|
||||
driver.setNamePolicy(XmlViolationPolicy.ALLOW);
|
||||
driver.setXmlnsPolicy(XmlViolationPolicy.ALLOW);
|
||||
driver.setErrorHandler(tokenHandler);
|
||||
writer = new OutputStreamWriter(System.out, "UTF-8");
|
||||
JSONParser jsonParser = new JSONParser(new InputStreamReader(stream,
|
||||
"UTF-8"));
|
||||
JSONObject obj = (JSONObject) jsonParser.nextValue();
|
||||
tests = (JSONArray) obj.get("tests");
|
||||
if (tests == null) {
|
||||
tests = (JSONArray) obj.get("xmlViolationTests");
|
||||
driver.setCommentPolicy(XmlViolationPolicy.ALTER_INFOSET);
|
||||
driver.setContentNonXmlCharPolicy(XmlViolationPolicy.ALTER_INFOSET);
|
||||
driver.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET);
|
||||
driver.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET);
|
||||
}
|
||||
}
|
||||
|
||||
private void runTests() throws SAXException, IOException {
|
||||
for (JSONValue val : tests.getValue()) {
|
||||
runTest((JSONObject) val);
|
||||
}
|
||||
writer.flush();
|
||||
}
|
||||
|
||||
private void runTest(JSONObject test) throws SAXException, IOException {
|
||||
String inputString = ((JSONString) test.get("input")).getValue();
|
||||
JSONArray expectedTokens = (JSONArray) test.get("output");
|
||||
String description = ((JSONString) test.get("description")).getValue();
|
||||
JSONString lastStartTagJSON = ((JSONString) test.get("lastStartTag"));
|
||||
String lastStartTag = lastStartTagJSON == null ? null
|
||||
: lastStartTagJSON.getValue();
|
||||
JSONArray contentModelFlags = (JSONArray) test.get("initialStates");
|
||||
if (contentModelFlags == null) {
|
||||
runTestInner(inputString, expectedTokens, description,
|
||||
Tokenizer.DATA, null);
|
||||
} else {
|
||||
for (JSONValue value : contentModelFlags.getValue()) {
|
||||
if (PCDATA.equals(value)) {
|
||||
runTestInner(inputString, expectedTokens, description,
|
||||
Tokenizer.DATA, lastStartTag);
|
||||
} else if (RAWTEXT.equals(value)) {
|
||||
runTestInner(inputString, expectedTokens, description,
|
||||
Tokenizer.RAWTEXT, lastStartTag);
|
||||
} else if (RCDATA.equals(value)) {
|
||||
runTestInner(inputString, expectedTokens, description,
|
||||
Tokenizer.RCDATA, lastStartTag);
|
||||
} else if (PLAINTEXT.equals(value)) {
|
||||
runTestInner(inputString, expectedTokens, description,
|
||||
Tokenizer.PLAINTEXT, lastStartTag);
|
||||
} else {
|
||||
throw new RuntimeException("Broken test data.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param contentModelElement
|
||||
* @param contentModelFlag
|
||||
* @param test
|
||||
* @throws SAXException
|
||||
* @throws IOException
|
||||
*/
|
||||
private void runTestInner(String inputString, JSONArray expectedTokens,
|
||||
String description, int contentModelFlag,
|
||||
String contentModelElement) throws SAXException, IOException {
|
||||
tokenHandler.setContentModelFlag(contentModelFlag, contentModelElement);
|
||||
InputSource is = new InputSource(new StringReader(inputString));
|
||||
try {
|
||||
driver.tokenize(is);
|
||||
JSONArray actualTokens = tokenHandler.getArray();
|
||||
if (jsonDeepEquals(actualTokens, expectedTokens)) {
|
||||
writer.write("Success\n");
|
||||
} else {
|
||||
writer.write("Failure\n");
|
||||
writer.write(description);
|
||||
writer.write("\nInput:\n");
|
||||
writer.write(inputString);
|
||||
writer.write("\nExpected tokens:\n");
|
||||
writer.write(expectedTokens.render(false));
|
||||
writer.write("\nActual tokens:\n");
|
||||
writer.write(actualTokens.render(false));
|
||||
writer.write("\n");
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
writer.write("Failure\n");
|
||||
writer.write(description);
|
||||
writer.write("\nInput:\n");
|
||||
writer.write(inputString);
|
||||
writer.write("\n");
|
||||
t.printStackTrace(new PrintWriter(writer, false));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param args
|
||||
* @throws RecognitionException
|
||||
* @throws TokenStreamException
|
||||
* @throws IOException
|
||||
* @throws SAXException
|
||||
*/
|
||||
public static void main(String[] args) throws TokenStreamException,
|
||||
RecognitionException, SAXException, IOException {
|
||||
for (int i = 0; i < args.length; i++) {
|
||||
TokenizerTester tester = new TokenizerTester(new FileInputStream(
|
||||
args[i]));
|
||||
tester.runTests();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
+239
@@ -0,0 +1,239 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Henri Sivonen
|
||||
* Copyright (c) 2008 Mozilla Foundation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Writer;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.Locator;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.ext.LexicalHandler;
|
||||
|
||||
public class TreeDumpContentHandler implements ContentHandler, LexicalHandler {
|
||||
|
||||
private final Writer writer;
|
||||
|
||||
private int level = 0;
|
||||
|
||||
private boolean inCharacters = false;
|
||||
|
||||
private boolean close;
|
||||
|
||||
/**
|
||||
* @param writer
|
||||
*/
|
||||
public TreeDumpContentHandler(final Writer writer, boolean close) {
|
||||
this.writer = writer;
|
||||
this.close = close;
|
||||
}
|
||||
|
||||
public TreeDumpContentHandler(final Writer writer) {
|
||||
this(writer, true);
|
||||
}
|
||||
|
||||
private void printLead() throws IOException {
|
||||
if (inCharacters) {
|
||||
writer.write("\"\n");
|
||||
inCharacters = false;
|
||||
}
|
||||
writer.write("| ");
|
||||
for (int i = 0; i < level; i++) {
|
||||
writer.write(" ");
|
||||
}
|
||||
}
|
||||
|
||||
public void characters(char[] ch, int start, int length)
|
||||
throws SAXException {
|
||||
try {
|
||||
if (!inCharacters) {
|
||||
printLead();
|
||||
writer.write('"');
|
||||
inCharacters = true;
|
||||
}
|
||||
writer.write(ch, start, length);
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void endElement(String uri, String localName, String qName)
|
||||
throws SAXException {
|
||||
try {
|
||||
if (inCharacters) {
|
||||
writer.write("\"\n");
|
||||
inCharacters = false;
|
||||
}
|
||||
level--;
|
||||
if ("http://www.w3.org/1999/xhtml" == uri &&
|
||||
"template" == localName) {
|
||||
// decrement level for the "content"
|
||||
level--;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void startElement(String uri, String localName, String qName,
|
||||
Attributes atts) throws SAXException {
|
||||
try {
|
||||
printLead();
|
||||
writer.write('<');
|
||||
if ("http://www.w3.org/1998/Math/MathML" == uri) {
|
||||
writer.write("math ");
|
||||
} else if ("http://www.w3.org/2000/svg" == uri) {
|
||||
writer.write("svg ");
|
||||
} else if ("http://www.w3.org/1999/xhtml" != uri) {
|
||||
writer.write("otherns ");
|
||||
}
|
||||
writer.write(localName);
|
||||
writer.write(">\n");
|
||||
level++;
|
||||
TreeMap<String, String> map = new TreeMap<String, String>();
|
||||
for (int i = 0; i < atts.getLength(); i++) {
|
||||
String ns = atts.getURI(i);
|
||||
String name;
|
||||
if ("http://www.w3.org/1999/xlink" == ns) {
|
||||
name = "xlink " + atts.getLocalName(i);
|
||||
} else if ("http://www.w3.org/XML/1998/namespace" == ns) {
|
||||
name = "xml " + atts.getLocalName(i);
|
||||
} else if ("http://www.w3.org/2000/xmlns/" == ns) {
|
||||
name = "xmlns " + atts.getLocalName(i);
|
||||
} else if ("" != uri) {
|
||||
name = atts.getLocalName(i);
|
||||
} else {
|
||||
name = "otherns " + atts.getLocalName(i);
|
||||
}
|
||||
map.put(name, atts.getValue(i));
|
||||
}
|
||||
for (Map.Entry<String, String> entry : map.entrySet()) {
|
||||
printLead();
|
||||
writer.write(entry.getKey());
|
||||
writer.write("=\"");
|
||||
writer.write(entry.getValue());
|
||||
writer.write("\"\n");
|
||||
}
|
||||
if ("http://www.w3.org/1999/xhtml" == uri &&
|
||||
"template" == localName) {
|
||||
printLead();
|
||||
level++;
|
||||
writer.write("content\n");
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void comment(char[] ch, int offset, int len) throws SAXException {
|
||||
try {
|
||||
printLead();
|
||||
writer.write("<!-- ");
|
||||
writer.write(ch, offset, len);
|
||||
writer.write(" -->\n");
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void startDTD(String name, String publicIdentifier,
|
||||
String systemIdentifier) throws SAXException {
|
||||
try {
|
||||
printLead();
|
||||
writer.write("<!DOCTYPE ");
|
||||
writer.write(name);
|
||||
if (publicIdentifier.length() > 0 || systemIdentifier.length() > 0) {
|
||||
writer.write(' ');
|
||||
writer.write('\"');
|
||||
writer.write(publicIdentifier);
|
||||
writer.write('\"');
|
||||
writer.write(' ');
|
||||
writer.write('\"');
|
||||
writer.write(systemIdentifier);
|
||||
writer.write('\"');
|
||||
}
|
||||
writer.write(">\n");
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void endDocument() throws SAXException {
|
||||
try {
|
||||
if (inCharacters) {
|
||||
writer.write("\"\n");
|
||||
inCharacters = false;
|
||||
}
|
||||
if (close) {
|
||||
writer.flush();
|
||||
writer.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void startPrefixMapping(String prefix, String uri)
|
||||
throws SAXException {
|
||||
}
|
||||
|
||||
public void startEntity(String arg0) throws SAXException {
|
||||
}
|
||||
|
||||
public void endCDATA() throws SAXException {
|
||||
}
|
||||
|
||||
public void endDTD() throws SAXException {
|
||||
}
|
||||
|
||||
public void endEntity(String arg0) throws SAXException {
|
||||
}
|
||||
|
||||
public void startCDATA() throws SAXException {
|
||||
}
|
||||
|
||||
public void endPrefixMapping(String prefix) throws SAXException {
|
||||
}
|
||||
|
||||
public void ignorableWhitespace(char[] ch, int start, int length)
|
||||
throws SAXException {
|
||||
}
|
||||
|
||||
public void processingInstruction(String target, String data)
|
||||
throws SAXException {
|
||||
}
|
||||
|
||||
public void setDocumentLocator(Locator locator) {
|
||||
}
|
||||
|
||||
public void skippedEntity(String name) throws SAXException {
|
||||
}
|
||||
|
||||
public void startDocument() throws SAXException {
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Henri Sivonen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStreamWriter;
|
||||
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import nu.validator.htmlparser.common.XmlViolationPolicy;
|
||||
import nu.validator.htmlparser.sax.HtmlParser;
|
||||
|
||||
public class TreePrinter {
|
||||
|
||||
public static void main(String[] args) throws SAXException, IOException {
|
||||
TreeDumpContentHandler treeDumpContentHandler = new TreeDumpContentHandler(new OutputStreamWriter(System.out, "UTF-8"));
|
||||
HtmlParser htmlParser = new HtmlParser();
|
||||
htmlParser.setContentHandler(treeDumpContentHandler);
|
||||
htmlParser.setLexicalHandler(treeDumpContentHandler);
|
||||
htmlParser.setErrorHandler(new SystemErrErrorHandler());
|
||||
htmlParser.setXmlPolicy(XmlViolationPolicy.ALLOW);
|
||||
File file = new File(args[0]);
|
||||
InputSource is = new InputSource(new FileInputStream(file));
|
||||
is.setSystemId(file.toURI().toASCIIString());
|
||||
htmlParser.parse(is);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,246 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Henri Sivonen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.test;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.StringWriter;
|
||||
import java.util.LinkedList;
|
||||
|
||||
import nu.validator.htmlparser.common.XmlViolationPolicy;
|
||||
import nu.validator.htmlparser.sax.HtmlParser;
|
||||
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXParseException;
|
||||
|
||||
public class TreeTester {
|
||||
|
||||
private final BufferedInputStream aggregateStream;
|
||||
|
||||
private boolean streaming = false;
|
||||
|
||||
/**
|
||||
* @param aggregateStream
|
||||
*/
|
||||
public TreeTester(InputStream aggregateStream) {
|
||||
this.aggregateStream = new BufferedInputStream(aggregateStream);
|
||||
}
|
||||
|
||||
private void runTests() throws Throwable {
|
||||
if (aggregateStream.read() != '#') {
|
||||
System.err.println("No hash at start!");
|
||||
return;
|
||||
}
|
||||
while (runTest()) {
|
||||
// spin
|
||||
}
|
||||
}
|
||||
|
||||
private boolean runTest() throws Throwable {
|
||||
UntilHashInputStream stream = null;
|
||||
try {
|
||||
String context = null;
|
||||
boolean scriptingEnabled = true;
|
||||
boolean hadScriptingDirective = false;
|
||||
aggregateStream.mark(12288);
|
||||
if (skipLabel()) { // #data
|
||||
return false;
|
||||
}
|
||||
stream = new UntilHashInputStream(aggregateStream);
|
||||
while (stream.read() != -1) {
|
||||
// spin
|
||||
}
|
||||
if (skipLabel()) { // #errors
|
||||
System.err.println("Premature end of test data.");
|
||||
return false;
|
||||
}
|
||||
stream = new UntilHashInputStream(aggregateStream);
|
||||
while (stream.read() != -1) {
|
||||
// spin
|
||||
}
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int c;
|
||||
while ((c = aggregateStream.read()) != '\n') {
|
||||
sb.append((char) c);
|
||||
}
|
||||
String label = sb.toString();
|
||||
if ("document-fragment".equals(label)) {
|
||||
sb.setLength(0);
|
||||
while ((c = aggregateStream.read()) != '\n') {
|
||||
sb.append((char) c);
|
||||
}
|
||||
context = sb.toString();
|
||||
// Now potentially gather #script-on/off
|
||||
sb.setLength(0);
|
||||
while ((c = aggregateStream.read()) != '\n') {
|
||||
sb.append((char) c);
|
||||
}
|
||||
label = sb.toString();
|
||||
}
|
||||
if ("script-on".equals(label)) {
|
||||
hadScriptingDirective = true;
|
||||
} else if ("script-off".equals(label)) {
|
||||
hadScriptingDirective = true;
|
||||
scriptingEnabled = false;
|
||||
}
|
||||
aggregateStream.reset();
|
||||
if (skipLabel()) { // #data
|
||||
System.err.println("Premature end of test data.");
|
||||
return false;
|
||||
}
|
||||
stream = new UntilHashInputStream(aggregateStream);
|
||||
InputSource is = new InputSource(stream);
|
||||
is.setEncoding("UTF-8");
|
||||
StringWriter sw = new StringWriter();
|
||||
ListErrorHandler leh = new ListErrorHandler();
|
||||
TreeDumpContentHandler treeDumpContentHandler = new TreeDumpContentHandler(
|
||||
sw);
|
||||
HtmlParser htmlParser = new HtmlParser(XmlViolationPolicy.ALLOW);
|
||||
if (streaming) {
|
||||
htmlParser.setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL);
|
||||
}
|
||||
htmlParser.setContentHandler(treeDumpContentHandler);
|
||||
htmlParser.setLexicalHandler(treeDumpContentHandler);
|
||||
htmlParser.setErrorHandler(leh);
|
||||
htmlParser.setScriptingEnabled(scriptingEnabled);
|
||||
try {
|
||||
if (context == null) {
|
||||
htmlParser.parse(is);
|
||||
} else {
|
||||
String ns = "http://www.w3.org/1999/xhtml";
|
||||
if (context.startsWith("svg ")) {
|
||||
ns = "http://www.w3.org/2000/svg";
|
||||
context = context.substring(4);
|
||||
} else if (context.startsWith("math ")) {
|
||||
ns = "http://www.w3.org/1998/Math/MathML";
|
||||
context = context.substring(5);
|
||||
}
|
||||
htmlParser.parseFragment(is, context, ns);
|
||||
treeDumpContentHandler.endDocument();
|
||||
}
|
||||
} catch (SAXParseException e) {
|
||||
// ignore
|
||||
}
|
||||
stream.close();
|
||||
|
||||
if (skipLabel()) { // #errors
|
||||
System.err.println("Premature end of test data.");
|
||||
return false;
|
||||
}
|
||||
LinkedList<String> expectedErrors = new LinkedList<String>();
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(
|
||||
new UntilHashInputStream(aggregateStream), "UTF-8"));
|
||||
String line = null;
|
||||
while ((line = br.readLine()) != null) {
|
||||
expectedErrors.add(line);
|
||||
}
|
||||
|
||||
if (context != null) {
|
||||
if (skipLabel()) { // #document-fragment
|
||||
System.err.println("Premature end of test data.");
|
||||
return false;
|
||||
}
|
||||
UntilHashInputStream stream2 = new UntilHashInputStream(aggregateStream);
|
||||
while (stream2.read() != -1) {
|
||||
// spin
|
||||
}
|
||||
}
|
||||
if (hadScriptingDirective && skipLabel()) { // #script-on/off
|
||||
System.err.println("Premature end of test data.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (skipLabel()) { // #document
|
||||
System.err.println("Premature end of test data.");
|
||||
return false;
|
||||
}
|
||||
|
||||
StringBuilder expectedBuilder = new StringBuilder();
|
||||
br = new BufferedReader(new InputStreamReader(
|
||||
new UntilHashInputStream(aggregateStream), "UTF-8"));
|
||||
int ch;
|
||||
while ((ch = br.read()) != -1) {
|
||||
expectedBuilder.append((char)ch);
|
||||
}
|
||||
String expected = expectedBuilder.toString();
|
||||
String actual = sw.toString();
|
||||
|
||||
LinkedList<String> actualErrors = leh.getErrors();
|
||||
|
||||
if (expected.equals(actual) || (streaming && leh.isFatal()) /*
|
||||
* && expectedErrors.size() ==
|
||||
* actualErrors.size()
|
||||
*/) {
|
||||
System.err.println("Success.");
|
||||
// System.err.println(stream);
|
||||
} else {
|
||||
System.err.print("Failure.\nData:\n" + stream + "\nExpected:\n"
|
||||
+ expected + "Got: \n" + actual);
|
||||
System.err.println("Expected errors:");
|
||||
for (String err : expectedErrors) {
|
||||
System.err.println(err);
|
||||
}
|
||||
System.err.println("Actual errors:");
|
||||
for (String err : actualErrors) {
|
||||
System.err.println(err);
|
||||
}
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
System.err.println("Failure.\nData:\n" + stream);
|
||||
throw t;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean skipLabel() throws IOException {
|
||||
int b = aggregateStream.read();
|
||||
if (b == -1) {
|
||||
return true;
|
||||
}
|
||||
for (;;) {
|
||||
b = aggregateStream.read();
|
||||
if (b == -1) {
|
||||
return true;
|
||||
} else if (b == 0x0A) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param args
|
||||
* @throws Throwable
|
||||
*/
|
||||
public static void main(String[] args) throws Throwable {
|
||||
for (int i = 0; i < args.length; i++) {
|
||||
TreeTester tester = new TreeTester(new FileInputStream(args[i]));
|
||||
tester.runTests();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
+97
@@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Henri Sivonen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
public class UntilHashInputStream extends InputStream {
|
||||
|
||||
private final StringBuilder builder = new StringBuilder();
|
||||
|
||||
private final InputStream delegate;
|
||||
|
||||
private int buffer = -1;
|
||||
|
||||
private boolean closed = false;
|
||||
|
||||
/**
|
||||
* @param delegate
|
||||
* @throws IOException
|
||||
*/
|
||||
public UntilHashInputStream(final InputStream delegate) throws IOException {
|
||||
this.delegate = delegate;
|
||||
this.buffer = delegate.read();
|
||||
if (buffer == '#') {
|
||||
closed = true;
|
||||
}
|
||||
}
|
||||
|
||||
public int read() throws IOException {
|
||||
if (closed) {
|
||||
return -1;
|
||||
}
|
||||
int rv = buffer;
|
||||
buffer = delegate.read();
|
||||
if (buffer == '#' && rv == '\n') {
|
||||
// end of stream
|
||||
closed = true;
|
||||
return -1;
|
||||
} else {
|
||||
if (rv >= 0x20 && rv < 0x80) {
|
||||
builder.append(((char)rv));
|
||||
} else {
|
||||
builder.append("0x");
|
||||
builder.append(Integer.toHexString(rv));
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @see java.io.InputStream#close()
|
||||
*/
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
super.close();
|
||||
if (closed) {
|
||||
return;
|
||||
}
|
||||
for (;;) {
|
||||
int b = delegate.read();
|
||||
if (b == 0x23 || b == -1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
closed = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see java.lang.Object#toString()
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
}
|
||||
+63
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright (c) 2008 Mozilla Foundation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.test;
|
||||
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.helpers.AttributesImpl;
|
||||
|
||||
import nu.validator.htmlparser.sax.XmlSerializer;
|
||||
|
||||
public class XmlSerializerTester {
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @param args
|
||||
* @throws SAXException
|
||||
*/
|
||||
public static void main(String[] args) throws SAXException {
|
||||
AttributesImpl attrs = new AttributesImpl();
|
||||
XmlSerializer serializer = new XmlSerializer(System.out);
|
||||
serializer.startDocument();
|
||||
serializer.startElement("1", "a", null, attrs);
|
||||
serializer.startElement("1", "b", null, attrs);
|
||||
serializer.endElement("1", "b", null);
|
||||
serializer.startElement("2", "c", null, attrs);
|
||||
serializer.endElement("2", "c", null);
|
||||
attrs.addAttribute("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "about", null, "CDATA", "");
|
||||
serializer.startElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "d", null, attrs);
|
||||
serializer.endElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "d", null);
|
||||
serializer.startPrefixMapping("rdf", "foo");
|
||||
serializer.startElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "e", null, attrs);
|
||||
serializer.startPrefixMapping("p0", "bar");
|
||||
serializer.startElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "f", null, attrs);
|
||||
serializer.characters("a\uD834\uDD21a\uD834a\uDD21a".toCharArray(), 0, 8);
|
||||
serializer.endElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "f", null);
|
||||
serializer.endElement("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "e", null);
|
||||
|
||||
serializer.endPrefixMapping("rdf");
|
||||
serializer.endElement("1", "a", null);
|
||||
serializer.endDocument();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Copyright (c) 2009 Mozilla Foundation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.test;
|
||||
|
||||
import nu.xom.Attribute;
|
||||
import nu.xom.Element;
|
||||
|
||||
public class XomTest {
|
||||
public static void main(String[] args) {
|
||||
Element elt = new Element("html", "http://www.w3.org/1999/xhtml");
|
||||
elt.addAttribute(new Attribute("xmlns:foo", "bar"));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
|
||||
<html>
|
||||
<head><title>Package Overview</title>
|
||||
<!--
|
||||
Copyright (c) 2007 Henri Sivonen
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
-->
|
||||
</head>
|
||||
<body bgcolor="white">
|
||||
<p>Test drivers.</p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,87 @@
|
||||
/*
|
||||
* Copyright (c) 2008 Mozilla Foundation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.tools;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.net.MalformedURLException;
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.transform.TransformerException;
|
||||
|
||||
import nu.validator.htmlparser.common.XmlViolationPolicy;
|
||||
import nu.validator.htmlparser.sax.HtmlParser;
|
||||
import nu.validator.htmlparser.sax.HtmlSerializer;
|
||||
import nu.validator.htmlparser.sax.XmlSerializer;
|
||||
import nu.validator.htmlparser.test.SystemErrErrorHandler;
|
||||
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
public class HTML2HTML {
|
||||
|
||||
/**
|
||||
* @param args
|
||||
*/
|
||||
public static void main(String[] args) throws SAXException,
|
||||
ParserConfigurationException, MalformedURLException, IOException,
|
||||
TransformerException {
|
||||
InputStream in;
|
||||
OutputStream out;
|
||||
|
||||
switch (args.length) {
|
||||
case 0:
|
||||
in = System.in;
|
||||
out = System.out;
|
||||
break;
|
||||
case 1:
|
||||
in = new FileInputStream(args[0]);
|
||||
out = System.out;
|
||||
break;
|
||||
case 2:
|
||||
in = new FileInputStream(args[0]);
|
||||
out = new FileOutputStream(args[1]);
|
||||
break;
|
||||
default:
|
||||
System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second.");
|
||||
System.exit(1);
|
||||
return;
|
||||
}
|
||||
|
||||
ContentHandler serializer = new HtmlSerializer(out);
|
||||
|
||||
HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALLOW);
|
||||
|
||||
parser.setErrorHandler(new SystemErrErrorHandler());
|
||||
parser.setContentHandler(serializer);
|
||||
parser.setProperty("http://xml.org/sax/properties/lexical-handler",
|
||||
serializer);
|
||||
parser.parse(new InputSource(in));
|
||||
out.flush();
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Copyright (c) 2008 Mozilla Foundation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.tools;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.net.MalformedURLException;
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.transform.TransformerException;
|
||||
|
||||
import nu.validator.htmlparser.common.XmlViolationPolicy;
|
||||
import nu.validator.htmlparser.sax.HtmlParser;
|
||||
import nu.validator.htmlparser.sax.XmlSerializer;
|
||||
import nu.validator.htmlparser.test.SystemErrErrorHandler;
|
||||
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
public class HTML2XML {
|
||||
|
||||
/**
|
||||
* @param args
|
||||
*/
|
||||
public static void main(String[] args) throws SAXException,
|
||||
ParserConfigurationException, MalformedURLException, IOException,
|
||||
TransformerException {
|
||||
InputStream in;
|
||||
OutputStream out;
|
||||
|
||||
switch (args.length) {
|
||||
case 0:
|
||||
in = System.in;
|
||||
out = System.out;
|
||||
break;
|
||||
case 1:
|
||||
in = new FileInputStream(args[0]);
|
||||
out = System.out;
|
||||
break;
|
||||
case 2:
|
||||
in = new FileInputStream(args[0]);
|
||||
out = new FileOutputStream(args[1]);
|
||||
break;
|
||||
default:
|
||||
System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second.");
|
||||
System.exit(1);
|
||||
return;
|
||||
}
|
||||
|
||||
ContentHandler serializer = new XmlSerializer(out);
|
||||
|
||||
HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET);
|
||||
|
||||
parser.setErrorHandler(new SystemErrErrorHandler());
|
||||
parser.setContentHandler(serializer);
|
||||
parser.setProperty("http://xml.org/sax/properties/lexical-handler",
|
||||
serializer);
|
||||
parser.parse(new InputSource(in));
|
||||
out.flush();
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Copyright (c) 2008 Mozilla Foundation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.tools;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.net.MalformedURLException;
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.parsers.SAXParserFactory;
|
||||
import javax.xml.transform.TransformerException;
|
||||
|
||||
import nu.validator.htmlparser.sax.HtmlSerializer;
|
||||
import nu.validator.htmlparser.sax.XmlSerializer;
|
||||
import nu.validator.htmlparser.test.SystemErrErrorHandler;
|
||||
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.XMLReader;
|
||||
|
||||
public class XML2HTML {
|
||||
|
||||
/**
|
||||
* @param args
|
||||
*/
|
||||
public static void main(String[] args) throws SAXException,
|
||||
ParserConfigurationException, MalformedURLException, IOException,
|
||||
TransformerException {
|
||||
InputStream in;
|
||||
OutputStream out;
|
||||
|
||||
switch (args.length) {
|
||||
case 0:
|
||||
in = System.in;
|
||||
out = System.out;
|
||||
break;
|
||||
case 1:
|
||||
in = new FileInputStream(args[0]);
|
||||
out = System.out;
|
||||
break;
|
||||
case 2:
|
||||
in = new FileInputStream(args[0]);
|
||||
out = new FileOutputStream(args[1]);
|
||||
break;
|
||||
default:
|
||||
System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second.");
|
||||
System.exit(1);
|
||||
return;
|
||||
}
|
||||
|
||||
ContentHandler serializer = new HtmlSerializer(out);
|
||||
|
||||
SAXParserFactory factory = SAXParserFactory.newInstance();
|
||||
factory.setNamespaceAware(true);
|
||||
factory.setValidating(false);
|
||||
XMLReader parser = factory.newSAXParser().getXMLReader();
|
||||
parser.setErrorHandler(new SystemErrErrorHandler());
|
||||
parser.setContentHandler(serializer);
|
||||
parser.setProperty("http://xml.org/sax/properties/lexical-handler",
|
||||
serializer);
|
||||
parser.parse(new InputSource(in));
|
||||
out.flush();
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Copyright (c) 2008 Mozilla Foundation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.tools;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.net.MalformedURLException;
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.parsers.SAXParserFactory;
|
||||
import javax.xml.transform.TransformerException;
|
||||
|
||||
import nu.validator.htmlparser.sax.NameCheckingXmlSerializer;
|
||||
import nu.validator.htmlparser.sax.XmlSerializer;
|
||||
import nu.validator.htmlparser.test.SystemErrErrorHandler;
|
||||
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.XMLReader;
|
||||
|
||||
public class XML2XML {
|
||||
|
||||
/**
|
||||
* @param args
|
||||
*/
|
||||
public static void main(String[] args) throws SAXException,
|
||||
ParserConfigurationException, MalformedURLException, IOException,
|
||||
TransformerException {
|
||||
InputStream in;
|
||||
OutputStream out;
|
||||
|
||||
switch (args.length) {
|
||||
case 0:
|
||||
in = System.in;
|
||||
out = System.out;
|
||||
break;
|
||||
case 1:
|
||||
in = new FileInputStream(args[0]);
|
||||
out = System.out;
|
||||
break;
|
||||
case 2:
|
||||
in = new FileInputStream(args[0]);
|
||||
out = new FileOutputStream(args[1]);
|
||||
break;
|
||||
default:
|
||||
System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second.");
|
||||
System.exit(1);
|
||||
return;
|
||||
}
|
||||
|
||||
ContentHandler serializer = new NameCheckingXmlSerializer(out);
|
||||
|
||||
SAXParserFactory factory = SAXParserFactory.newInstance();
|
||||
factory.setNamespaceAware(true);
|
||||
factory.setValidating(false);
|
||||
XMLReader parser = factory.newSAXParser().getXMLReader();
|
||||
parser.setErrorHandler(new SystemErrErrorHandler());
|
||||
parser.setContentHandler(serializer);
|
||||
parser.setProperty("http://xml.org/sax/properties/lexical-handler",
|
||||
serializer);
|
||||
parser.parse(new InputSource(in));
|
||||
out.flush();
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,237 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Henri Sivonen
|
||||
* Copyright (c) 2007 Mozilla Foundation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.tools;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.parsers.SAXParserFactory;
|
||||
import javax.xml.transform.Templates;
|
||||
import javax.xml.transform.Transformer;
|
||||
import javax.xml.transform.TransformerException;
|
||||
import javax.xml.transform.TransformerFactory;
|
||||
import javax.xml.transform.dom.DOMSource;
|
||||
import javax.xml.transform.sax.SAXResult;
|
||||
import javax.xml.transform.sax.SAXTransformerFactory;
|
||||
import javax.xml.transform.sax.TemplatesHandler;
|
||||
import javax.xml.transform.sax.TransformerHandler;
|
||||
|
||||
import nu.validator.htmlparser.common.XmlViolationPolicy;
|
||||
import nu.validator.htmlparser.dom.HtmlDocumentBuilder;
|
||||
import nu.validator.htmlparser.sax.HtmlParser;
|
||||
import nu.validator.htmlparser.sax.HtmlSerializer;
|
||||
import nu.validator.htmlparser.sax.XmlSerializer;
|
||||
import nu.validator.htmlparser.test.SystemErrErrorHandler;
|
||||
|
||||
import org.w3c.dom.Document;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.XMLReader;
|
||||
import org.xml.sax.ext.LexicalHandler;
|
||||
|
||||
public class XSLT4HTML5 {
|
||||
|
||||
private enum Mode {
|
||||
STREAMING_SAX, BUFFERED_SAX, DOM,
|
||||
}
|
||||
|
||||
private static final String TEMPLATE = "--template=";
|
||||
|
||||
private static final String INPUT_HTML = "--input-html=";
|
||||
|
||||
private static final String INPUT_XML = "--input-xml=";
|
||||
|
||||
private static final String OUTPUT_HTML = "--output-html=";
|
||||
|
||||
private static final String OUTPUT_XML = "--output-xml=";
|
||||
|
||||
private static final String MODE = "--mode=";
|
||||
|
||||
/**
|
||||
* @param args
|
||||
* @throws ParserConfigurationException
|
||||
* @throws SAXException
|
||||
* @throws IOException
|
||||
* @throws MalformedURLException
|
||||
* @throws TransformerException
|
||||
*/
|
||||
public static void main(String[] args) throws SAXException,
|
||||
ParserConfigurationException, MalformedURLException, IOException, TransformerException {
|
||||
if (args.length == 0) {
|
||||
System.out.println("--template=file --input-[html|xml]=file --output-[html|xml]=file --mode=[sax-streaming|sax-buffered|dom]");
|
||||
System.exit(0);
|
||||
}
|
||||
String template = null;
|
||||
String input = null;
|
||||
boolean inputHtml = false;
|
||||
String output = null;
|
||||
boolean outputHtml = false;
|
||||
Mode mode = null;
|
||||
for (int i = 0; i < args.length; i++) {
|
||||
String arg = args[i];
|
||||
if (arg.startsWith(TEMPLATE)) {
|
||||
if (template == null) {
|
||||
template = arg.substring(TEMPLATE.length());
|
||||
} else {
|
||||
System.err.println("Tried to set template twice.");
|
||||
System.exit(1);
|
||||
}
|
||||
} else if (arg.startsWith(INPUT_HTML)) {
|
||||
if (input == null) {
|
||||
input = arg.substring(INPUT_HTML.length());
|
||||
inputHtml = true;
|
||||
} else {
|
||||
System.err.println("Tried to set input twice.");
|
||||
System.exit(2);
|
||||
}
|
||||
} else if (arg.startsWith(INPUT_XML)) {
|
||||
if (input == null) {
|
||||
input = arg.substring(INPUT_XML.length());
|
||||
inputHtml = false;
|
||||
} else {
|
||||
System.err.println("Tried to set input twice.");
|
||||
System.exit(2);
|
||||
}
|
||||
} else if (arg.startsWith(OUTPUT_HTML)) {
|
||||
if (output == null) {
|
||||
output = arg.substring(OUTPUT_HTML.length());
|
||||
outputHtml = true;
|
||||
} else {
|
||||
System.err.println("Tried to set output twice.");
|
||||
System.exit(3);
|
||||
}
|
||||
} else if (arg.startsWith(OUTPUT_XML)) {
|
||||
if (output == null) {
|
||||
output = arg.substring(OUTPUT_XML.length());
|
||||
outputHtml = false;
|
||||
} else {
|
||||
System.err.println("Tried to set output twice.");
|
||||
System.exit(3);
|
||||
}
|
||||
} else if (arg.startsWith(MODE)) {
|
||||
if (mode == null) {
|
||||
String modeStr = arg.substring(MODE.length());
|
||||
if ("dom".equals(modeStr)) {
|
||||
mode = Mode.DOM;
|
||||
} else if ("sax-buffered".equals(modeStr)) {
|
||||
mode = Mode.BUFFERED_SAX;
|
||||
} else if ("sax-streaming".equals(modeStr)) {
|
||||
mode = Mode.STREAMING_SAX;
|
||||
} else {
|
||||
System.err.println("Unrecognized mode.");
|
||||
System.exit(5);
|
||||
}
|
||||
} else {
|
||||
System.err.println("Tried to set mode twice.");
|
||||
System.exit(4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (template == null) {
|
||||
System.err.println("No template specified.");
|
||||
System.exit(6);
|
||||
}
|
||||
if (input == null) {
|
||||
System.err.println("No input specified.");
|
||||
System.exit(7);
|
||||
}
|
||||
if (output == null) {
|
||||
System.err.println("No output specified.");
|
||||
System.exit(8);
|
||||
}
|
||||
if (mode == null) {
|
||||
mode = Mode.BUFFERED_SAX;
|
||||
}
|
||||
|
||||
SystemErrErrorHandler errorHandler = new SystemErrErrorHandler();
|
||||
|
||||
SAXParserFactory factory = SAXParserFactory.newInstance();
|
||||
factory.setNamespaceAware(true);
|
||||
factory.setValidating(false);
|
||||
XMLReader reader = factory.newSAXParser().getXMLReader();
|
||||
reader.setErrorHandler(errorHandler);
|
||||
|
||||
SAXTransformerFactory transformerFactory = (SAXTransformerFactory) TransformerFactory.newInstance();
|
||||
transformerFactory.setErrorListener(errorHandler);
|
||||
TemplatesHandler templatesHandler = transformerFactory.newTemplatesHandler();
|
||||
reader.setContentHandler(templatesHandler);
|
||||
reader.parse(new File(template).toURI().toASCIIString());
|
||||
|
||||
Templates templates = templatesHandler.getTemplates();
|
||||
|
||||
FileOutputStream outputStream = new FileOutputStream(output);
|
||||
ContentHandler serializer;
|
||||
if (outputHtml) {
|
||||
serializer = new HtmlSerializer(outputStream);
|
||||
} else {
|
||||
serializer = new XmlSerializer(outputStream);
|
||||
}
|
||||
SAXResult result = new SAXResult(new XmlnsDropper(serializer));
|
||||
result.setLexicalHandler((LexicalHandler) serializer);
|
||||
|
||||
if (mode == Mode.DOM) {
|
||||
Document inputDoc;
|
||||
DocumentBuilder builder;
|
||||
if (inputHtml) {
|
||||
builder = new HtmlDocumentBuilder(XmlViolationPolicy.ALTER_INFOSET);
|
||||
} else {
|
||||
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
|
||||
factory.setNamespaceAware(true);
|
||||
try {
|
||||
builder = builderFactory.newDocumentBuilder();
|
||||
} catch (ParserConfigurationException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
inputDoc = builder.parse(new File(input));
|
||||
DOMSource inputSource = new DOMSource(inputDoc,
|
||||
new File(input).toURI().toASCIIString());
|
||||
Transformer transformer = templates.newTransformer();
|
||||
transformer.setErrorListener(errorHandler);
|
||||
transformer.transform(inputSource, result);
|
||||
} else {
|
||||
if (inputHtml) {
|
||||
reader = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET);
|
||||
if (mode == Mode.STREAMING_SAX) {
|
||||
reader.setProperty("http://validator.nu/properties/streamability-violation-policy", XmlViolationPolicy.FATAL);
|
||||
}
|
||||
}
|
||||
TransformerHandler transformerHandler = transformerFactory.newTransformerHandler(templates);
|
||||
transformerHandler.setResult(result);
|
||||
reader.setErrorHandler(errorHandler);
|
||||
reader.setContentHandler(transformerHandler);
|
||||
reader.setProperty("http://xml.org/sax/properties/lexical-handler", transformerHandler);
|
||||
reader.parse(new File(input).toURI().toASCIIString());
|
||||
}
|
||||
outputStream.flush();
|
||||
outputStream.close();
|
||||
}
|
||||
|
||||
}
|
||||
+162
@@ -0,0 +1,162 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Henri Sivonen
|
||||
* Copyright (c) 2007 Mozilla Foundation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.tools;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import nu.validator.htmlparser.common.XmlViolationPolicy;
|
||||
import nu.validator.htmlparser.sax.HtmlSerializer;
|
||||
import nu.validator.htmlparser.xom.HtmlBuilder;
|
||||
import nu.xom.Builder;
|
||||
import nu.xom.Document;
|
||||
import nu.xom.Element;
|
||||
import nu.xom.Nodes;
|
||||
import nu.xom.ParsingException;
|
||||
import nu.xom.Serializer;
|
||||
import nu.xom.ValidityException;
|
||||
import nu.xom.converters.SAXConverter;
|
||||
import nu.xom.xslt.XSLException;
|
||||
import nu.xom.xslt.XSLTransform;
|
||||
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
public class XSLT4HTML5XOM {
|
||||
|
||||
private static final String TEMPLATE = "--template=";
|
||||
|
||||
private static final String INPUT_HTML = "--input-html=";
|
||||
|
||||
private static final String INPUT_XML = "--input-xml=";
|
||||
|
||||
private static final String OUTPUT_HTML = "--output-html=";
|
||||
|
||||
private static final String OUTPUT_XML = "--output-xml=";
|
||||
|
||||
/**
|
||||
* @param args
|
||||
* @throws IOException
|
||||
* @throws ParsingException
|
||||
* @throws ValidityException
|
||||
* @throws XSLException
|
||||
* @throws SAXException
|
||||
*/
|
||||
public static void main(String[] args) throws ValidityException,
|
||||
ParsingException, IOException, XSLException, SAXException {
|
||||
if (args.length == 0) {
|
||||
System.out.println("--template=file --input-[html|xml]=file --output-[html|xml]=file --mode=[sax-streaming|sax-buffered|dom]");
|
||||
System.exit(0);
|
||||
}
|
||||
String template = null;
|
||||
String input = null;
|
||||
boolean inputHtml = false;
|
||||
String output = null;
|
||||
boolean outputHtml = false;
|
||||
for (int i = 0; i < args.length; i++) {
|
||||
String arg = args[i];
|
||||
if (arg.startsWith(TEMPLATE)) {
|
||||
if (template == null) {
|
||||
template = arg.substring(TEMPLATE.length());
|
||||
} else {
|
||||
System.err.println("Tried to set template twice.");
|
||||
System.exit(1);
|
||||
}
|
||||
} else if (arg.startsWith(INPUT_HTML)) {
|
||||
if (input == null) {
|
||||
input = arg.substring(INPUT_HTML.length());
|
||||
inputHtml = true;
|
||||
} else {
|
||||
System.err.println("Tried to set input twice.");
|
||||
System.exit(2);
|
||||
}
|
||||
} else if (arg.startsWith(INPUT_XML)) {
|
||||
if (input == null) {
|
||||
input = arg.substring(INPUT_XML.length());
|
||||
inputHtml = false;
|
||||
} else {
|
||||
System.err.println("Tried to set input twice.");
|
||||
System.exit(2);
|
||||
}
|
||||
} else if (arg.startsWith(OUTPUT_HTML)) {
|
||||
if (output == null) {
|
||||
output = arg.substring(OUTPUT_HTML.length());
|
||||
outputHtml = true;
|
||||
} else {
|
||||
System.err.println("Tried to set output twice.");
|
||||
System.exit(3);
|
||||
}
|
||||
} else if (arg.startsWith(OUTPUT_XML)) {
|
||||
if (output == null) {
|
||||
output = arg.substring(OUTPUT_XML.length());
|
||||
outputHtml = false;
|
||||
} else {
|
||||
System.err.println("Tried to set output twice.");
|
||||
System.exit(3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (template == null) {
|
||||
System.err.println("No template specified.");
|
||||
System.exit(6);
|
||||
}
|
||||
if (input == null) {
|
||||
System.err.println("No input specified.");
|
||||
System.exit(7);
|
||||
}
|
||||
if (output == null) {
|
||||
System.err.println("No output specified.");
|
||||
System.exit(8);
|
||||
}
|
||||
|
||||
Builder builder = new Builder();
|
||||
|
||||
Document transformationDoc = builder.build(new File(template));
|
||||
|
||||
XSLTransform transform = new XSLTransform(transformationDoc);
|
||||
|
||||
FileOutputStream outputStream = new FileOutputStream(output);
|
||||
|
||||
Document inputDoc;
|
||||
if (inputHtml) {
|
||||
builder = new HtmlBuilder(XmlViolationPolicy.ALTER_INFOSET);
|
||||
}
|
||||
inputDoc = builder.build(new File(input));
|
||||
Nodes result = transform.transform(inputDoc);
|
||||
Document outputDoc = new Document((Element) result.get(0));
|
||||
if (outputHtml) {
|
||||
HtmlSerializer htmlSerializer = new HtmlSerializer(outputStream);
|
||||
SAXConverter converter = new SAXConverter(htmlSerializer);
|
||||
converter.setLexicalHandler(htmlSerializer);
|
||||
converter.convert(outputDoc);
|
||||
} else {
|
||||
Serializer serializer = new Serializer(outputStream);
|
||||
serializer.write(outputDoc);
|
||||
}
|
||||
outputStream.flush();
|
||||
outputStream.close();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,169 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Henri Sivonen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package nu.validator.htmlparser.tools;
|
||||
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.Locator;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.helpers.AttributesImpl;
|
||||
|
||||
/**
|
||||
* Quick and dirty hack to work around Xalan xmlns weirdness.
|
||||
*
|
||||
* @version $Id$
|
||||
* @author hsivonen
|
||||
*/
|
||||
class XmlnsDropper implements ContentHandler {
|
||||
|
||||
private final ContentHandler delegate;
|
||||
|
||||
/**
|
||||
* @param delegate
|
||||
*/
|
||||
public XmlnsDropper(final ContentHandler delegate) {
|
||||
this.delegate = delegate;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ch
|
||||
* @param start
|
||||
* @param length
|
||||
* @throws SAXException
|
||||
* @see org.xml.sax.ContentHandler#characters(char[], int, int)
|
||||
*/
|
||||
public void characters(char[] ch, int start, int length) throws SAXException {
|
||||
delegate.characters(ch, start, length);
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws SAXException
|
||||
* @see org.xml.sax.ContentHandler#endDocument()
|
||||
*/
|
||||
public void endDocument() throws SAXException {
|
||||
delegate.endDocument();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param uri
|
||||
* @param localName
|
||||
* @param qName
|
||||
* @throws SAXException
|
||||
* @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
|
||||
*/
|
||||
public void endElement(String uri, String localName, String qName) throws SAXException {
|
||||
delegate.endElement(uri, localName, qName);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param prefix
|
||||
* @throws SAXException
|
||||
* @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String)
|
||||
*/
|
||||
public void endPrefixMapping(String prefix) throws SAXException {
|
||||
delegate.endPrefixMapping(prefix);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ch
|
||||
* @param start
|
||||
* @param length
|
||||
* @throws SAXException
|
||||
* @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
|
||||
*/
|
||||
public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
|
||||
delegate.ignorableWhitespace(ch, start, length);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param target
|
||||
* @param data
|
||||
* @throws SAXException
|
||||
* @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String)
|
||||
*/
|
||||
public void processingInstruction(String target, String data) throws SAXException {
|
||||
delegate.processingInstruction(target, data);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param locator
|
||||
* @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator)
|
||||
*/
|
||||
public void setDocumentLocator(Locator locator) {
|
||||
delegate.setDocumentLocator(locator);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param name
|
||||
* @throws SAXException
|
||||
* @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String)
|
||||
*/
|
||||
public void skippedEntity(String name) throws SAXException {
|
||||
delegate.skippedEntity(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws SAXException
|
||||
* @see org.xml.sax.ContentHandler#startDocument()
|
||||
*/
|
||||
public void startDocument() throws SAXException {
|
||||
delegate.startDocument();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param uri
|
||||
* @param localName
|
||||
* @param qName
|
||||
* @param atts
|
||||
* @throws SAXException
|
||||
* @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
|
||||
*/
|
||||
public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
|
||||
AttributesImpl ai = new AttributesImpl();
|
||||
for (int i = 0; i < atts.getLength(); i++) {
|
||||
String u = atts.getURI(i);
|
||||
String t = atts.getType(i);
|
||||
String v = atts.getValue(i);
|
||||
String n = atts.getLocalName(i);
|
||||
String q = atts.getQName(i);
|
||||
if (q != null) {
|
||||
if ("xmlns".equals(q) || q.startsWith("xmlns:")) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
ai.addAttribute(u, n, q, t, v);
|
||||
}
|
||||
delegate.startElement(uri, localName, qName, ai);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param prefix
|
||||
* @param uri
|
||||
* @throws SAXException
|
||||
* @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, java.lang.String)
|
||||
*/
|
||||
public void startPrefixMapping(String prefix, String uri) throws SAXException {
|
||||
delegate.startPrefixMapping(prefix, uri);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
|
||||
<html>
|
||||
<head><title>Package Overview</title>
|
||||
<!--
|
||||
Copyright (c) 2007 Henri Sivonen
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
-->
|
||||
</head>
|
||||
<body bgcolor="white">
|
||||
<p>Demo apps.</p>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user