1
0
mirror of https://github.com/roytam1/UXP.git synced 2026-05-26 22:48:47 +00:00

Fix an issue with the html5 tokenizer and tree builder (java htmlparser)

This commit is contained in:
Matt A. Tobin
2020-01-15 22:07:59 -05:00
committed by Roy Tam
parent 7938a0ac1b
commit 3338198f76
2 changed files with 35 additions and 18 deletions
@@ -680,6 +680,22 @@ public class Tokenizer implements Locator {
*
* @param specialTokenizerState
* the tokenizer state to set
*/
public void setState(int specialTokenizerState) {
this.stateSave = specialTokenizerState;
this.endTagExpectation = null;
this.endTagExpectationAsArray = null;
}
// [NOCPP[
/**
* Sets the tokenizer state and the associated element name. This should
* only ever used to put the tokenizer into one of the states that have
* a special end tag expectation. For use from the tokenizer test harness.
*
* @param specialTokenizerState
* the tokenizer state to set
* @param endTagExpectation
* the expected end tag for transitioning back to normal
*/
@@ -695,6 +711,8 @@ public class Tokenizer implements Locator {
endTagExpectationToArray();
}
// ]NOCPP]
/**
* Sets the tokenizer state and the associated element name. This should
* only ever used to put the tokenizer into one of the states that have
@@ -3749,11 +3767,17 @@ public class Tokenizer implements Locator {
c = checkChar(buf, pos);
/*
* ASSERT! when entering this state, set index to 0 and
* call clearStrBufBeforeUse() assert (contentModelElement !=
* null); Let's implement the above without lookahead.
* strBuf is the 'temporary buffer'.
* call clearStrBufBeforeUse(); Let's implement the above
* without lookahead. strBuf is the 'temporary buffer'.
*/
if (index < endTagExpectationAsArray.length) {
if (endTagExpectationAsArray == null) {
tokenHandler.characters(Tokenizer.LT_SOLIDUS,
0, 2);
cstart = pos;
reconsume = true;
state = transition(state, returnState, reconsume, pos);
continue stateloop;
} else if (index < endTagExpectationAsArray.length) {
char e = endTagExpectationAsArray[index];
char folded = c;
if (c >= 'A' && c <= 'Z') {
@@ -640,8 +640,7 @@ public abstract class TreeBuilder<T> implements TokenHandler,
);
currentPtr++;
stack[currentPtr] = node;
tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA,
contextName);
tokenizer.setState(Tokenizer.DATA);
// The frameset-ok flag is set even though <frameset> never
// ends up being allowed as HTML frameset in the fragment case.
mode = FRAMESET_OK;
@@ -671,8 +670,7 @@ public abstract class TreeBuilder<T> implements TokenHandler,
);
currentPtr++;
stack[currentPtr] = node;
tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA,
contextName);
tokenizer.setState(Tokenizer.DATA);
// The frameset-ok flag is set even though <frameset> never
// ends up being allowed as HTML frameset in the fragment case.
mode = FRAMESET_OK;
@@ -691,23 +689,18 @@ public abstract class TreeBuilder<T> implements TokenHandler,
resetTheInsertionMode();
formPointer = getFormPointerForContext(contextNode);
if ("title" == contextName || "textarea" == contextName) {
tokenizer.setStateAndEndTagExpectation(Tokenizer.RCDATA,
contextName);
tokenizer.setState(Tokenizer.RCDATA);
} else if ("style" == contextName || "xmp" == contextName
|| "iframe" == contextName || "noembed" == contextName
|| "noframes" == contextName
|| (scriptingEnabled && "noscript" == contextName)) {
tokenizer.setStateAndEndTagExpectation(Tokenizer.RAWTEXT,
contextName);
tokenizer.setState(Tokenizer.RAWTEXT);
} else if ("plaintext" == contextName) {
tokenizer.setStateAndEndTagExpectation(Tokenizer.PLAINTEXT,
contextName);
tokenizer.setState(Tokenizer.PLAINTEXT);
} else if ("script" == contextName) {
tokenizer.setStateAndEndTagExpectation(
Tokenizer.SCRIPT_DATA, contextName);
tokenizer.setState(Tokenizer.SCRIPT_DATA);
} else {
tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA,
contextName);
tokenizer.setState(Tokenizer.DATA);
}
}
contextName = null;