diff --git a/dom/base/DOMCursor.cpp b/dom/base/DOMCursor.cpp index 5eaf562648..28af38fa9c 100644 --- a/dom/base/DOMCursor.cpp +++ b/dom/base/DOMCursor.cpp @@ -27,6 +27,13 @@ DOMCursor::DOMCursor(nsPIDOMWindow* aWindow, nsICursorContinueCallback* aCallbac { } +DOMCursor::DOMCursor(nsIGlobalObject* aGlobal, nsICursorContinueCallback* aCallback) + : DOMRequest(aGlobal) + , mCallback(aCallback) + , mFinished(false) +{ +} + void DOMCursor::Reset() { diff --git a/dom/base/DOMCursor.h b/dom/base/DOMCursor.h index 4217b4ca68..108d2fb7e6 100644 --- a/dom/base/DOMCursor.h +++ b/dom/base/DOMCursor.h @@ -26,6 +26,7 @@ public: DOMRequest) DOMCursor(nsPIDOMWindow* aWindow, nsICursorContinueCallback *aCallback); + DOMCursor(nsIGlobalObject* aGlobal, nsICursorContinueCallback *aCallback); virtual JSObject* WrapObject(JSContext* aCx, JS::Handle aGivenProto) override; diff --git a/dom/base/DOMRequest.cpp b/dom/base/DOMRequest.cpp index 7a1461b461..a3949d5298 100644 --- a/dom/base/DOMRequest.cpp +++ b/dom/base/DOMRequest.cpp @@ -20,7 +20,7 @@ using mozilla::dom::DOMRequest; using mozilla::dom::DOMRequestService; using mozilla::dom::DOMCursor; using mozilla::dom::Promise; -using mozilla::AutoSafeJSContext; +using mozilla::dom::AutoJSAPI; DOMRequest::DOMRequest(nsPIDOMWindow* aWindow) : DOMEventTargetHelper(aWindow->IsInnerWindow() ? @@ -30,6 +30,13 @@ DOMRequest::DOMRequest(nsPIDOMWindow* aWindow) { } +DOMRequest::DOMRequest(nsIGlobalObject* aGlobal) + : DOMEventTargetHelper(aGlobal) + , mResult(JS::UndefinedValue()) + , mDone(false) +{ +} + DOMRequest::~DOMRequest() { mResult.setUndefined(); @@ -234,6 +241,7 @@ NS_IMETHODIMP DOMRequestService::CreateRequest(nsIDOMWindow* aWindow, nsIDOMDOMRequest** aRequest) { + MOZ_ASSERT(NS_IsMainThread()); nsCOMPtr win(do_QueryInterface(aWindow)); NS_ENSURE_STATE(win); NS_ADDREF(*aRequest = new DOMRequest(win)); @@ -305,13 +313,9 @@ public: Dispatch(DOMRequest* aRequest, const JS::Value& aResult) { - NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); mozilla::ThreadsafeAutoSafeJSContext cx; nsRefPtr asyncTask = new FireSuccessAsyncTask(cx, aRequest, aResult); - if (NS_FAILED(NS_DispatchToMainThread(asyncTask))) { - NS_WARNING("Failed to dispatch to main thread!"); - return NS_ERROR_FAILURE; - } + MOZ_ALWAYS_TRUE(NS_SUCCEEDED(NS_DispatchToCurrentThread(asyncTask))); return NS_OK; } @@ -322,11 +326,6 @@ public: return NS_OK; } - ~FireSuccessAsyncTask() - { - NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); - } - private: nsRefPtr mReq; JS::PersistentRooted mResult; @@ -368,10 +367,7 @@ DOMRequestService::FireErrorAsync(nsIDOMDOMRequest* aRequest, NS_ENSURE_STATE(aRequest); nsCOMPtr asyncTask = new FireErrorAsyncTask(static_cast(aRequest), aError); - if (NS_FAILED(NS_DispatchToMainThread(asyncTask))) { - NS_WARNING("Failed to dispatch to main thread!"); - return NS_ERROR_FAILURE; - } + MOZ_ALWAYS_TRUE(NS_SUCCEEDED(NS_DispatchToCurrentThread(asyncTask))); return NS_OK; } diff --git a/dom/base/DOMRequest.h b/dom/base/DOMRequest.h index 8989343eea..21f8a1f231 100644 --- a/dom/base/DOMRequest.h +++ b/dom/base/DOMRequest.h @@ -84,6 +84,7 @@ public: void FireDetailedError(DOMError* aError); explicit DOMRequest(nsPIDOMWindow* aWindow); + explicit DOMRequest(nsIGlobalObject* aGlobal); protected: virtual ~DOMRequest(); diff --git a/dom/base/nsContentPolicy.cpp b/dom/base/nsContentPolicy.cpp index 1041bcfb94..73a9f547af 100644 --- a/dom/base/nsContentPolicy.cpp +++ b/dom/base/nsContentPolicy.cpp @@ -145,12 +145,26 @@ nsContentPolicy::CheckPolicy(CPMethod policyMethod, /* check the appropriate policy */ // Send the internal content policy type to the mixed content blocker // which needs to know about TYPE_INTERNAL_WORKER, - // TYPE_INTERNAL_SHARED_WORKER and TYPE_INTERNAL_SERVICE_WORKER. + // TYPE_INTERNAL_SHARED_WORKER and TYPE_INTERNAL_SERVICE_WORKER + // and also preloads: TYPE_INTERNAL_SCRIPT_PRELOAD, + // TYPE_INTERNAL_IMAGE_PRELOAD, TYPE_INTERNAL_STYLESHEET_PRELOAD bool isMixedContentBlocker = mixedContentBlocker == entries[i]; nsContentPolicyType type = externalType; if (isMixedContentBlocker) { type = externalTypeOrMCBInternal; } + // Send the internal content policy type for CSP which needs to + // know about preloads and workers, in particular: + // * TYPE_INTERNAL_SCRIPT_PRELOAD + // * TYPE_INTERNAL_IMAGE_PRELOAD + // * TYPE_INTERNAL_STYLESHEET_PRELOAD + // * TYPE_INTERNAL_WORKER + // * TYPE_INTERNAL_SHARED_WORKER + // * TYPE_INTERNAL_SERVICE_WORKER + bool isCSP = cspService == entries[i]; + if (isCSP) { + type = externalTypeOrCSPInternal; + } rv = (entries[i]->*policyMethod)(type, contentLocation, requestingLocation, requestingContext, mimeType, extra, requestPrincipal, @@ -180,7 +194,7 @@ nsContentPolicy::CheckPolicy(CPMethod policyMethod, if (topFrameElement) { nsCOMPtr topWindow = window->GetScriptableTop(); - isTopLevel = topWindow == static_cast(window); + isTopLevel = topWindow == window; } else { // If we don't have a top frame element, then requestingContext is // part of the top-level XUL document. Presumably it's the diff --git a/dom/base/nsDataDocumentContentPolicy.cpp b/dom/base/nsDataDocumentContentPolicy.cpp index a25d48e78c..bd33ee34d7 100644 --- a/dom/base/nsDataDocumentContentPolicy.cpp +++ b/dom/base/nsDataDocumentContentPolicy.cpp @@ -83,12 +83,12 @@ nsDataDocumentContentPolicy::ShouldLoad(uint32_t aContentType, // OR // - URI loadable by subsumers, e.g. blob URIs // Any URI that doesn't meet these requirements will be rejected below. - if (!HasFlags(aContentLocation, - nsIProtocolHandler::URI_IS_LOCAL_RESOURCE) || - (!HasFlags(aContentLocation, - nsIProtocolHandler::URI_INHERITS_SECURITY_CONTEXT) && - !HasFlags(aContentLocation, - nsIProtocolHandler::URI_LOADABLE_BY_SUBSUMERS))) { + if (!(HasFlags(aContentLocation, + nsIProtocolHandler::URI_IS_LOCAL_RESOURCE) && + (HasFlags(aContentLocation, + nsIProtocolHandler::URI_INHERITS_SECURITY_CONTEXT) || + HasFlags(aContentLocation, + nsIProtocolHandler::URI_LOADABLE_BY_SUBSUMERS)))) { *aDecision = nsIContentPolicy::REJECT_TYPE; // Report error, if we can. @@ -99,7 +99,7 @@ nsDataDocumentContentPolicy::ShouldLoad(uint32_t aContentType, requestingPrincipal->GetURI(getter_AddRefs(principalURI)); if (NS_SUCCEEDED(rv) && principalURI) { nsScriptSecurityManager::ReportError( - nullptr, NS_LITERAL_STRING("CheckSameOriginError"), principalURI, + nullptr, NS_LITERAL_STRING("ExternalDataError"), principalURI, aContentLocation); } } diff --git a/dom/locales/en-US/chrome/security/caps.properties b/dom/locales/en-US/chrome/security/caps.properties index 176286f1f4..f072756ee2 100644 --- a/dom/locales/en-US/chrome/security/caps.properties +++ b/dom/locales/en-US/chrome/security/caps.properties @@ -4,6 +4,7 @@ CheckMessage = Remember this decision CheckLoadURIError = Security Error: Content at %S may not load or link to %S. CheckSameOriginError = Security Error: Content at %S may not load data from %S. +ExternalDataError = Security Error: Content at %S attempted to load %S, but may not load external data when being used as an image. # LOCALIZATION NOTE (GetPropertyDeniedOrigins): # %1$S is the origin of the script which was denied access. diff --git a/dom/webidl/DOMCursor.webidl b/dom/webidl/DOMCursor.webidl index 3ad36a76fa..f5d0447ea8 100644 --- a/dom/webidl/DOMCursor.webidl +++ b/dom/webidl/DOMCursor.webidl @@ -3,6 +3,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +[Exposed=(Window,Worker)] interface DOMCursor : EventTarget { readonly attribute boolean done; [Throws] diff --git a/dom/webidl/DOMRequest.webidl b/dom/webidl/DOMRequest.webidl index 53686ab113..74a656d661 100644 --- a/dom/webidl/DOMRequest.webidl +++ b/dom/webidl/DOMRequest.webidl @@ -5,7 +5,7 @@ enum DOMRequestReadyState { "pending", "done" }; -[NoInterfaceObject] +[Exposed=(Window,Worker), NoInterfaceObject] interface DOMRequestShared { readonly attribute DOMRequestReadyState readyState; @@ -16,6 +16,7 @@ interface DOMRequestShared { attribute EventHandler onerror; }; +[Exposed=(Window,Worker)] interface DOMRequest : EventTarget { // The [TreatNonCallableAsNull] annotation is required since then() should do // nothing instead of throwing errors when non-callable arguments are passed. diff --git a/dom/workers/test/serviceworkers/test_serviceworker_interfaces.js b/dom/workers/test/serviceworkers/test_serviceworker_interfaces.js index be5e74d716..669dacc6c3 100644 --- a/dom/workers/test/serviceworkers/test_serviceworker_interfaces.js +++ b/dom/workers/test/serviceworkers/test_serviceworker_interfaces.js @@ -101,10 +101,14 @@ var interfaceNamesInGlobalScope = { name: "DataStore", b2g: true }, // IMPORTANT: Do not change this list without review from a DOM peer! { name: "DataStoreCursor", b2g: true }, +// IMPORTANT: Do not change this list without review from a DOM peer! + "DOMCursor", // IMPORTANT: Do not change this list without review from a DOM peer! "DOMError", // IMPORTANT: Do not change this list without review from a DOM peer! "DOMException", +// IMPORTANT: Do not change this list without review from a DOM peer! + "DOMRequest", // IMPORTANT: Do not change this list without review from a DOM peer! "DOMStringList", // IMPORTANT: Do not change this list without review from a DOM peer! diff --git a/dom/workers/test/test_worker_interfaces.js b/dom/workers/test/test_worker_interfaces.js index 45fe833f9b..f59ab99b53 100644 --- a/dom/workers/test/test_worker_interfaces.js +++ b/dom/workers/test/test_worker_interfaces.js @@ -99,10 +99,14 @@ var interfaceNamesInGlobalScope = { name: "DataStore", b2g: true }, // IMPORTANT: Do not change this list without review from a DOM peer! { name: "DataStoreCursor", b2g: true }, +// IMPORTANT: Do not change this list without review from a DOM peer! + "DOMCursor", // IMPORTANT: Do not change this list without review from a DOM peer! "DOMError", // IMPORTANT: Do not change this list without review from a DOM peer! "DOMException", +// IMPORTANT: Do not change this list without review from a DOM peer! + "DOMRequest", // IMPORTANT: Do not change this list without review from a DOM peer! "DOMStringList", // IMPORTANT: Do not change this list without review from a DOM peer! diff --git a/modules/libpref/Preferences.cpp b/modules/libpref/Preferences.cpp index 18a1efee17..673ede3699 100644 --- a/modules/libpref/Preferences.cpp +++ b/modules/libpref/Preferences.cpp @@ -517,8 +517,7 @@ Preferences::Init() { nsresult rv; - rv = PREF_Init(); - NS_ENSURE_SUCCESS(rv, rv); + PREF_Init(); rv = pref_InitInitialObjects(); NS_ENSURE_SUCCESS(rv, rv); @@ -638,8 +637,7 @@ Preferences::ResetPrefs() NotifyServiceObservers(NS_PREFSERVICE_RESET_TOPIC_ID); PREF_CleanupPrefs(); - nsresult rv = PREF_Init(); - NS_ENSURE_SUCCESS(rv, rv); + PREF_Init(); return pref_InitInitialObjects(); } diff --git a/modules/libpref/nsPrefBranch.cpp b/modules/libpref/nsPrefBranch.cpp index d7beaee91a..66ef79cced 100644 --- a/modules/libpref/nsPrefBranch.cpp +++ b/modules/libpref/nsPrefBranch.cpp @@ -25,6 +25,10 @@ #include "prefapi_private_data.h" +#ifdef MOZ_CRASHREPORTER +#include "nsICrashReporter.h" +#endif + #include "nsIConsoleService.h" #ifdef DEBUG @@ -316,6 +320,16 @@ NS_IMETHODIMP nsPrefBranch::GetComplexValue(const char *aPrefName, const nsIID & // some addons, see bug 836263. nsAutoString wdata; if (!AppendUTF8toUTF16(utf8String, wdata, mozilla::fallible)) { +#ifdef MOZ_CRASHREPORTER + nsCOMPtr cr = + do_GetService("@mozilla.org/toolkit/crash-reporter;1"); + if (cr) { + cr->AnnotateCrashReport(NS_LITERAL_CSTRING("bug836263-size"), + nsPrintfCString("%x", utf8String.Length())); + cr->RegisterAppMemory(uint64_t(utf8String.BeginReading()), + std::min(0x1000U, utf8String.Length())); + } +#endif NS_RUNTIMEABORT("bug836263"); } theString->SetData(wdata); @@ -353,7 +367,7 @@ nsresult nsPrefBranch::CheckSanityOfStringLength(const char* aPrefName, const ui } nsAutoCString message(nsPrintfCString("Warning: attempting to write %d bytes to preference %s. This is bad for general performance and memory usage. Such an amount of data should rather be written to an external file.", aLength, - aPrefName)); + getPrefName(aPrefName))); rv = console->LogStringMessage(NS_ConvertUTF8toUTF16(message).get()); if (NS_FAILED(rv)) { return rv; diff --git a/modules/libpref/prefapi.cpp b/modules/libpref/prefapi.cpp index 721a6afcc3..8c06434a2c 100644 --- a/modules/libpref/prefapi.cpp +++ b/modules/libpref/prefapi.cpp @@ -147,7 +147,7 @@ static nsresult pref_HashPref(const char *key, PrefValue value, PrefType type, u #define PREF_HASHTABLE_INITIAL_LENGTH 1024 -nsresult PREF_Init() +void PREF_Init() { if (!gHashTable) { gHashTable = new PLDHashTable(&pref_HashTableOps, @@ -157,7 +157,6 @@ nsresult PREF_Init() PL_INIT_ARENA_POOL(&gPrefNameArena, "PrefNameArena", PREFNAME_ARENA_SIZE); } - return NS_OK; } /* Frees the callback list. */ diff --git a/modules/libpref/prefapi.h b/modules/libpref/prefapi.h index f1e412a678..b8bd1da3dc 100644 --- a/modules/libpref/prefapi.h +++ b/modules/libpref/prefapi.h @@ -42,7 +42,7 @@ struct PrefHashEntry : PLDHashEntryHdr // the preference hashtable. // */ -nsresult PREF_Init(); +void PREF_Init(); /* // Cleanup should be called at program exit to free the diff --git a/parser/html/jArray.h b/parser/html/jArray.h index 4bd7dcd089..3a7f488d88 100644 --- a/parser/html/jArray.h +++ b/parser/html/jArray.h @@ -53,6 +53,12 @@ struct jArray { jArray newArray = { new T[len], len }; return newArray; } + static jArray newFallibleJArray(L const len) { + MOZ_ASSERT(len >= 0, "Negative length."); + T* a = new (mozilla::fallible) T[len]; + jArray newArray = { a, a ? len : 0 }; + return newArray; + } operator T*() { return arr; } T& operator[] (L const index) { MOZ_ASSERT(index >= 0, "Array access with negative index."); diff --git a/parser/html/javasrc/Tokenizer.java b/parser/html/javasrc/Tokenizer.java index 997a58f8f5..1c6d150712 100644 --- a/parser/html/javasrc/Tokenizer.java +++ b/parser/html/javasrc/Tokenizer.java @@ -53,7 +53,7 @@ import org.xml.sax.SAXParseException; /** * An implementation of - * http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html + * https://html.spec.whatwg.org/multipage/syntax.html#tokenization * * This class implements the Locator interface. This is not an * incidental implementation detail: Users of this class are encouraged to make @@ -262,11 +262,6 @@ public class Tokenizer implements Locator { */ private static final @NoLength char[] LF = { '\n' }; - /** - * Buffer growth parameter. - */ - private static final int BUFFER_GROW_BY = 1024; - /** * "CDATA[" as char[] */ @@ -355,9 +350,7 @@ public class Tokenizer implements Locator { private int candidate; - private int strBufMark; - - private int prevValue; + private int charRefBufMark; protected int value; @@ -378,7 +371,8 @@ public class Tokenizer implements Locator { private String systemId; /** - * Buffer for short identifiers. + * Buffer for bufferable things other than those that fit the description + * of charRefBuf. */ private @Auto char[] strBuf; @@ -388,25 +382,15 @@ public class Tokenizer implements Locator { private int strBufLen; /** - * -1 to indicate that strBuf is used or otherwise - * an offset to the main buffer. + * Buffer for characters that might form a character reference but may + * end up not forming one. */ - // private int strBufOffset = -1; - /** - * Buffer for long strings. - */ - private @Auto char[] longStrBuf; + private final @Auto char[] charRefBuf; /** - * Number of significant chars in longStrBuf. + * Number of significant chars in charRefBuf. */ - private int longStrBufLen; - - /** - * -1 to indicate that longStrBuf is used or - * otherwise an offset to the main buffer. - */ - // private int longStrBufOffset = -1; + private int charRefBufLen; /** * Buffer for expanding NCRs falling into the Basic Multilingual Plane. @@ -453,7 +437,7 @@ public class Tokenizer implements Locator { protected boolean html4; /** - * Whether the stream is past the first 512 bytes. + * Whether the stream is past the first 1024 bytes. */ private boolean metaBoundaryPassed; @@ -521,6 +505,9 @@ public class Tokenizer implements Locator { this.tokenHandler = tokenHandler; this.encodingDeclarationHandler = null; this.newAttributesEachTime = newAttributesEachTime; + // ∳ is the longest valid char ref and + // the semicolon never gets appended to the buffer. + this.charRefBuf = new char[32]; this.bmpChar = new char[1]; this.astralChar = new char[2]; this.tagName = null; @@ -547,6 +534,9 @@ public class Tokenizer implements Locator { // [NOCPP[ this.newAttributesEachTime = false; // ]NOCPP] + // ∳ is the longest valid char ref and + // the semicolon never gets appended to the buffer. + this.charRefBuf = new char[32]; this.bmpChar = new char[1]; this.astralChar = new char[2]; this.tagName = null; @@ -818,6 +808,25 @@ public class Tokenizer implements Locator { // ]NOCPP] } + @Inline private void appendCharRefBuf(char c) { + charRefBuf[charRefBufLen++] = c; + } + + @Inline private void clearCharRefBufAndAppend(char c) { + charRefBuf[0] = c; + charRefBufLen = 1; + } + + private void emitOrAppendCharRefBuf(int returnState) throws SAXException { + if ((returnState & DATA_AND_RCDATA_MASK) != 0) { + appendCharRefBufToStrBuf(); + } else { + if (charRefBufLen > 0) { + tokenHandler.characters(charRefBuf, 0, charRefBufLen); + } + } + } + @Inline private void clearStrBufAndAppend(char c) { strBuf[0] = c; strBufLen = 1; @@ -828,37 +837,32 @@ public class Tokenizer implements Locator { } /** - * Appends to the smaller buffer. + * Appends to the buffer. * * @param c * the UTF-16 code unit to append */ private void appendStrBuf(char c) { - if (strBufLen == strBuf.length) { - char[] newBuf = new char[strBuf.length + Tokenizer.BUFFER_GROW_BY]; - System.arraycopy(strBuf, 0, newBuf, 0, strBuf.length); - strBuf = newBuf; - } strBuf[strBufLen++] = c; } /** - * The smaller buffer as a String. Currently only used for error reporting. + * The buffer as a String. Currently only used for error reporting. * *

* C++ memory note: The return value must be released. * - * @return the smaller buffer as a string + * @return the buffer as a string */ protected String strBufToString() { return Portability.newStringFromBuffer(strBuf, 0, strBufLen); } /** - * Returns the short buffer as a local name. The return value is released in + * Returns the buffer as a local name. The return value is released in * emitDoctypeToken(). * - * @return the smaller buffer as local name + * @return the buffer as local name */ private void strBufToDoctypeName() { doctypeName = Portability.newLocalNameFromBuffer(strBuf, 0, strBufLen, @@ -866,7 +870,7 @@ public class Tokenizer implements Locator { } /** - * Emits the smaller buffer as character tokens. + * Emits the buffer as character tokens. * * @throws SAXException * if the token handler threw @@ -877,41 +881,16 @@ public class Tokenizer implements Locator { } } - @Inline private void clearLongStrBuf() { - longStrBufLen = 0; - } - - @Inline private void clearLongStrBufAndAppend(char c) { - longStrBuf[0] = c; - longStrBufLen = 1; - } - - /** - * Appends to the larger buffer. - * - * @param c - * the UTF-16 code unit to append - */ - private void appendLongStrBuf(char c) { - if (longStrBufLen == longStrBuf.length) { - char[] newBuf = new char[longStrBufLen + (longStrBufLen >> 1)]; - System.arraycopy(longStrBuf, 0, newBuf, 0, longStrBuf.length); - longStrBuf = newBuf; - } - longStrBuf[longStrBufLen++] = c; - } - @Inline private void appendSecondHyphenToBogusComment() throws SAXException { // [NOCPP[ switch (commentPolicy) { case ALTER_INFOSET: - // detachLongStrBuf(); - appendLongStrBuf(' '); + appendStrBuf(' '); // FALLTHROUGH case ALLOW: warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment."); // ]NOCPP] - appendLongStrBuf('-'); + appendStrBuf('-'); // [NOCPP[ break; case FATAL: @@ -925,8 +904,7 @@ public class Tokenizer implements Locator { private void maybeAppendSpaceToBogusComment() throws SAXException { switch (commentPolicy) { case ALTER_INFOSET: - // detachLongStrBuf(); - appendLongStrBuf(' '); + appendStrBuf(' '); // FALLTHROUGH case ALLOW: warn("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment."); @@ -939,21 +917,20 @@ public class Tokenizer implements Locator { // ]NOCPP] - @Inline private void adjustDoubleHyphenAndAppendToLongStrBufAndErr(char c) + @Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr(char c) throws SAXException { errConsecutiveHyphens(); // [NOCPP[ switch (commentPolicy) { case ALTER_INFOSET: - // detachLongStrBuf(); - longStrBufLen--; - appendLongStrBuf(' '); - appendLongStrBuf('-'); + strBufLen--; + appendStrBuf(' '); + appendStrBuf('-'); // FALLTHROUGH case ALLOW: warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment."); // ]NOCPP] - appendLongStrBuf(c); + appendStrBuf(c); // [NOCPP[ break; case FATAL: @@ -963,34 +940,22 @@ public class Tokenizer implements Locator { // ]NOCPP] } - private void appendLongStrBuf(@NoLength char[] buffer, int offset, int length) { - int reqLen = longStrBufLen + length; - if (longStrBuf.length < reqLen) { + private void appendStrBuf(@NoLength char[] buffer, int offset, int length) { + int reqLen = strBufLen + length; + if (strBuf.length < reqLen) { char[] newBuf = new char[reqLen + (reqLen >> 1)]; - System.arraycopy(longStrBuf, 0, newBuf, 0, longStrBuf.length); - longStrBuf = newBuf; + System.arraycopy(strBuf, 0, newBuf, 0, strBuf.length); + strBuf = newBuf; } - System.arraycopy(buffer, offset, longStrBuf, longStrBufLen, length); - longStrBufLen = reqLen; + System.arraycopy(buffer, offset, strBuf, strBufLen, length); + strBufLen = reqLen; } /** - * Append the contents of the smaller buffer to the larger one. + * Append the contents of the char reference buffer to the main one. */ - @Inline private void appendStrBufToLongStrBuf() { - appendLongStrBuf(strBuf, 0, strBufLen); - } - - /** - * The larger buffer as a string. - * - *

- * C++ memory note: The return value must be released. - * - * @return the larger buffer as a string - */ - private String longStrBufToString() { - return Portability.newStringFromBuffer(longStrBuf, 0, longStrBufLen); + @Inline private void appendCharRefBufToStrBuf() { + appendStrBuf(charRefBuf, 0, charRefBufLen); } /** @@ -1006,13 +971,8 @@ public class Tokenizer implements Locator { // [NOCPP[ if (wantsComments) { // ]NOCPP] - // if (longStrBufOffset != -1) { - // tokenHandler.comment(buf, longStrBufOffset, longStrBufLen - // - provisionalHyphens); - // } else { - tokenHandler.comment(longStrBuf, 0, longStrBufLen + tokenHandler.comment(strBuf, 0, strBufLen - provisionalHyphens); - // } // [NOCPP[ } // ]NOCPP] @@ -1101,12 +1061,8 @@ public class Tokenizer implements Locator { } private void strBufToElementNameString() { - // if (strBufOffset != -1) { - // return ElementName.elementNameByBuffer(buf, strBufOffset, strBufLen); - // } else { tagName = ElementName.elementNameByBuffer(strBuf, 0, strBufLen, interner); - // } } private int emitCurrentTagToken(boolean selfClosing, int pos) @@ -1153,16 +1109,11 @@ public class Tokenizer implements Locator { } private void attributeNameComplete() throws SAXException { - // if (strBufOffset != -1) { - // attributeName = AttributeName.nameByBuffer(buf, strBufOffset, - // strBufLen, namePolicy != XmlViolationPolicy.ALLOW); - // } else { attributeName = AttributeName.nameByBuffer(strBuf, 0, strBufLen // [NOCPP[ , namePolicy != XmlViolationPolicy.ALLOW // ]NOCPP] , interner); - // } if (attributes == null) { attributes = new HtmlAttributes(mappingLangToXmlLang); @@ -1239,7 +1190,7 @@ public class Tokenizer implements Locator { } // ]NOCPP] if (attributeName != null) { - String val = longStrBufToString(); // Ownership transferred to + String val = strBufToString(); // Ownership transferred to // HtmlAttributes // CPPONLY: if (mViewSource) { // CPPONLY: mViewSource.MaybeLinkifyAttributeValue(attributeName, val); @@ -1299,6 +1250,13 @@ public class Tokenizer implements Locator { lastCR = false; int start = buffer.getStart(); + int end = buffer.getEnd(); + + // In C++, the caller of tokenizeBuffer needs to do this explicitly. + // [NOCPP[ + ensureBufferSpace(end - start); + // ]NOCPP] + /** * The index of the last char read from buf. */ @@ -1349,9 +1307,9 @@ public class Tokenizer implements Locator { // CPPONLY: } // [NOCPP[ pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, - buffer.getEnd()); + end); // ]NOCPP] - if (pos == buffer.getEnd()) { + if (pos == end) { // exiting due to end of buffer buffer.setStart(pos); } else { @@ -1360,6 +1318,32 @@ public class Tokenizer implements Locator { return lastCR; } + // [NOCPP[ + private void ensureBufferSpace(int inputLength) throws SAXException { + // Add 2 to account for emissions of LT_GT, LT_SOLIDUS and RSQB_RSQB. + // Adding to the general worst case instead of only the + // TreeBuilder-exposed worst case to avoid re-introducing a bug when + // unifying the tokenizer and tree builder buffers in the future. + int worstCase = strBufLen + inputLength + charRefBufLen + 2; + tokenHandler.ensureBufferSpace(worstCase); + if (strBuf == null) { + // Add an arbitrary small value to avoid immediate reallocation + // once there are a few characters in the buffer. + strBuf = new char[worstCase + 128]; + } else if (worstCase > strBuf.length) { + // HotSpot reportedly allocates memory with 8-byte accuracy, so + // there's no point in trying to do math here to avoid slop. + // Maybe we should add some small constant to worstCase here + // but not doing that without profiling. In C++ with jemalloc, + // the corresponding method should do math to round up here + // to avoid slop. + char[] newBuf = new char[worstCase]; + System.arraycopy(strBuf, 0, newBuf, 0, strBufLen); + strBuf = newBuf; + } + } + // ]NOCPP] + @SuppressWarnings("unused") private int stateLoop(int state, char c, int pos, @NoLength char[] buf, boolean reconsume, int returnState, int endPos) throws SAXException { @@ -1450,7 +1434,7 @@ public class Tokenizer implements Locator { * reference in data state. */ flushChars(buf, pos); - clearStrBufAndAppend(c); + clearCharRefBufAndAppend(c); setAdditionalAndRememberAmpersandLocation('\u0000'); returnState = state; state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); @@ -1569,7 +1553,7 @@ public class Tokenizer implements Locator { /* * Switch to the bogus comment state. */ - clearLongStrBufAndAppend(c); + clearStrBufAndAppend(c); state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); continue stateloop; case '>': @@ -1916,7 +1900,7 @@ public class Tokenizer implements Locator { * U+0022 QUOTATION MARK (") Switch to the * attribute value (double-quoted) state. */ - clearLongStrBuf(); + clearStrBuf(); state = transition(state, Tokenizer.ATTRIBUTE_VALUE_DOUBLE_QUOTED, reconsume, pos); break beforeattributevalueloop; // continue stateloop; @@ -1926,7 +1910,7 @@ public class Tokenizer implements Locator { * value (unquoted) state and reconsume this * input character. */ - clearLongStrBuf(); + clearStrBuf(); reconsume = true; state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos); noteUnquotedAttributeValue(); @@ -1936,7 +1920,7 @@ public class Tokenizer implements Locator { * U+0027 APOSTROPHE (') Switch to the attribute * value (single-quoted) state. */ - clearLongStrBuf(); + clearStrBuf(); state = transition(state, Tokenizer.ATTRIBUTE_VALUE_SINGLE_QUOTED, reconsume, pos); continue stateloop; case '>': @@ -1979,7 +1963,7 @@ public class Tokenizer implements Locator { * Anything else Append the current input * character to the current attribute's value. */ - clearLongStrBufAndAppend(c); + clearStrBufAndAppend(c); /* * Switch to the attribute value (unquoted) * state. @@ -2022,16 +2006,16 @@ public class Tokenizer implements Locator { * additional allowed character being U+0022 * QUOTATION MARK ("). */ - clearStrBufAndAppend(c); + clearCharRefBufAndAppend(c); setAdditionalAndRememberAmpersandLocation('\"'); returnState = state; state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); continue stateloop; case '\r': - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); break stateloop; case '\n': - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); continue; case '\u0000': c = '\uFFFD'; @@ -2041,7 +2025,7 @@ public class Tokenizer implements Locator { * Anything else Append the current input * character to the current attribute's value. */ - appendLongStrBuf(c); + appendStrBuf(c); /* * Stay in the attribute value (double-quoted) * state. @@ -2191,7 +2175,7 @@ public class Tokenizer implements Locator { * additional allowed character being U+003E * GREATER-THAN SIGN (>) */ - clearStrBufAndAppend(c); + clearCharRefBufAndAppend(c); setAdditionalAndRememberAmpersandLocation('>'); returnState = state; state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); @@ -2237,7 +2221,7 @@ public class Tokenizer implements Locator { * Anything else Append the current input * character to the current attribute's value. */ - appendLongStrBuf(c); + appendStrBuf(c); /* * Stay in the attribute value (unquoted) state. */ @@ -2376,19 +2360,19 @@ public class Tokenizer implements Locator { */ switch (c) { case '-': - clearLongStrBufAndAppend(c); + clearStrBufAndAppend(c); state = transition(state, Tokenizer.MARKUP_DECLARATION_HYPHEN, reconsume, pos); break markupdeclarationopenloop; // continue stateloop; case 'd': case 'D': - clearLongStrBufAndAppend(c); + clearStrBufAndAppend(c); index = 0; state = transition(state, Tokenizer.MARKUP_DECLARATION_OCTYPE, reconsume, pos); continue stateloop; case '[': if (tokenHandler.cdataSectionAllowed()) { - clearLongStrBufAndAppend(c); + clearStrBufAndAppend(c); index = 0; state = transition(state, Tokenizer.CDATA_START, reconsume, pos); continue stateloop; @@ -2396,7 +2380,7 @@ public class Tokenizer implements Locator { // else fall through default: errBogusComment(); - clearLongStrBuf(); + clearStrBuf(); reconsume = true; state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); continue stateloop; @@ -2413,7 +2397,7 @@ public class Tokenizer implements Locator { case '\u0000': break stateloop; case '-': - clearLongStrBuf(); + clearStrBuf(); state = transition(state, Tokenizer.COMMENT_START, reconsume, pos); break markupdeclarationhyphenloop; // continue stateloop; @@ -2443,7 +2427,7 @@ public class Tokenizer implements Locator { * U+002D HYPHEN-MINUS (-) Switch to the comment * start dash state. */ - appendLongStrBuf(c); + appendStrBuf(c); state = transition(state, Tokenizer.COMMENT_START_DASH, reconsume, pos); continue stateloop; case '>': @@ -2459,11 +2443,11 @@ public class Tokenizer implements Locator { state = transition(state, Tokenizer.DATA, reconsume, pos); continue stateloop; case '\r': - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); state = transition(state, Tokenizer.COMMENT, reconsume, pos); break stateloop; case '\n': - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); state = transition(state, Tokenizer.COMMENT, reconsume, pos); break commentstartloop; case '\u0000': @@ -2474,7 +2458,7 @@ public class Tokenizer implements Locator { * Anything else Append the input character to * the comment token's data. */ - appendLongStrBuf(c); + appendStrBuf(c); /* * Switch to the comment state. */ @@ -2499,15 +2483,15 @@ public class Tokenizer implements Locator { * U+002D HYPHEN-MINUS (-) Switch to the comment * end dash state */ - appendLongStrBuf(c); + appendStrBuf(c); state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos); break commentloop; // continue stateloop; case '\r': - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); break stateloop; case '\n': - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); continue; case '\u0000': c = '\uFFFD'; @@ -2517,7 +2501,7 @@ public class Tokenizer implements Locator { * Anything else Append the input character to * the comment token's data. */ - appendLongStrBuf(c); + appendStrBuf(c); /* * Stay in the comment state. */ @@ -2541,16 +2525,16 @@ public class Tokenizer implements Locator { * U+002D HYPHEN-MINUS (-) Switch to the comment * end state */ - appendLongStrBuf(c); + appendStrBuf(c); state = transition(state, Tokenizer.COMMENT_END, reconsume, pos); break commentenddashloop; // continue stateloop; case '\r': - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); state = transition(state, Tokenizer.COMMENT, reconsume, pos); break stateloop; case '\n': - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); state = transition(state, Tokenizer.COMMENT, reconsume, pos); continue stateloop; case '\u0000': @@ -2562,7 +2546,7 @@ public class Tokenizer implements Locator { * (-) character and the input character to the * comment token's data. */ - appendLongStrBuf(c); + appendStrBuf(c); /* * Switch to the comment state. */ @@ -2599,22 +2583,22 @@ public class Tokenizer implements Locator { * Append a U+002D HYPHEN-MINUS (-) character to * the comment token's data. */ - adjustDoubleHyphenAndAppendToLongStrBufAndErr(c); + adjustDoubleHyphenAndAppendToStrBufAndErr(c); /* * Stay in the comment end state. */ continue; case '\r': - adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn(); + adjustDoubleHyphenAndAppendToStrBufCarriageReturn(); state = transition(state, Tokenizer.COMMENT, reconsume, pos); break stateloop; case '\n': - adjustDoubleHyphenAndAppendToLongStrBufLineFeed(); + adjustDoubleHyphenAndAppendToStrBufLineFeed(); state = transition(state, Tokenizer.COMMENT, reconsume, pos); continue stateloop; case '!': errHyphenHyphenBang(); - appendLongStrBuf(c); + appendStrBuf(c); state = transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos); continue stateloop; case '\u0000': @@ -2626,7 +2610,7 @@ public class Tokenizer implements Locator { * and the input character to the comment * token's data. */ - adjustDoubleHyphenAndAppendToLongStrBufAndErr(c); + adjustDoubleHyphenAndAppendToStrBufAndErr(c); /* * Switch to the comment state. */ @@ -2664,17 +2648,17 @@ public class Tokenizer implements Locator { * and a U+0021 EXCLAMATION MARK (!) character * to the comment token's data. */ - appendLongStrBuf(c); + appendStrBuf(c); /* * Switch to the comment end dash state. */ state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos); continue stateloop; case '\r': - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); break stateloop; case '\n': - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); continue; case '\u0000': c = '\uFFFD'; @@ -2687,7 +2671,7 @@ public class Tokenizer implements Locator { * comment token's data. Switch to the comment * state. */ - appendLongStrBuf(c); + appendStrBuf(c); /* * Switch to the comment state. */ @@ -2712,7 +2696,7 @@ public class Tokenizer implements Locator { * U+002D HYPHEN-MINUS (-) Switch to the comment end * state */ - appendLongStrBuf(c); + appendStrBuf(c); state = transition(state, Tokenizer.COMMENT_END, reconsume, pos); continue stateloop; case '>': @@ -2725,11 +2709,11 @@ public class Tokenizer implements Locator { state = transition(state, Tokenizer.DATA, reconsume, pos); continue stateloop; case '\r': - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); state = transition(state, Tokenizer.COMMENT, reconsume, pos); break stateloop; case '\n': - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); state = transition(state, Tokenizer.COMMENT, reconsume, pos); continue stateloop; case '\u0000': @@ -2741,7 +2725,7 @@ public class Tokenizer implements Locator { * the current input character to the comment * token's data. */ - appendLongStrBuf(c); + appendStrBuf(c); /* * Switch to the comment state. */ @@ -2757,7 +2741,7 @@ public class Tokenizer implements Locator { c = checkChar(buf, pos); if (index < 6) { // CDATA_LSQB.length if (c == Tokenizer.CDATA_LSQB[index]) { - appendLongStrBuf(c); + appendStrBuf(c); } else { errBogusComment(); reconsume = true; @@ -2880,17 +2864,17 @@ public class Tokenizer implements Locator { * + additional allowed character being U+0027 * APOSTROPHE ('). */ - clearStrBufAndAppend(c); + clearCharRefBufAndAppend(c); setAdditionalAndRememberAmpersandLocation('\''); returnState = state; state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); break attributevaluesinglequotedloop; // continue stateloop; case '\r': - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); break stateloop; case '\n': - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); continue; case '\u0000': c = '\uFFFD'; @@ -2900,7 +2884,7 @@ public class Tokenizer implements Locator { * Anything else Append the current input * character to the current attribute's value. */ - appendLongStrBuf(c); + appendStrBuf(c); /* * Stay in the attribute value (double-quoted) * state. @@ -2942,7 +2926,7 @@ public class Tokenizer implements Locator { case '\u000C': case '<': case '&': - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos; } @@ -2954,12 +2938,12 @@ public class Tokenizer implements Locator { * U+0023 NUMBER SIGN (#) Consume the U+0023 NUMBER * SIGN. */ - appendStrBuf('#'); + appendCharRefBuf('#'); state = transition(state, Tokenizer.CONSUME_NCR, reconsume, pos); continue stateloop; default: if (c == additional) { - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); reconsume = true; state = transition(state, returnState, reconsume, pos); continue stateloop; @@ -2975,7 +2959,7 @@ public class Tokenizer implements Locator { * error. */ errNoNamedCharacterMatch(); - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos; } @@ -2984,7 +2968,7 @@ public class Tokenizer implements Locator { continue stateloop; } // Didn't fail yet - appendStrBuf(c); + appendCharRefBuf(c); state = transition(state, Tokenizer.CHARACTER_REFERENCE_HILO_LOOKUP, reconsume, pos); // FALL THROUGH continue stateloop; } @@ -3049,7 +3033,7 @@ public class Tokenizer implements Locator { * error. */ errNoNamedCharacterMatch(); - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos; } @@ -3058,12 +3042,12 @@ public class Tokenizer implements Locator { continue stateloop; } // Didn't fail yet - appendStrBuf(c); + appendCharRefBuf(c); lo = hilo & 0xFFFF; hi = hilo >> 16; entCol = -1; candidate = -1; - strBufMark = 0; + charRefBufMark = 0; state = transition(state, Tokenizer.CHARACTER_REFERENCE_TAIL, reconsume, pos); // FALL THROUGH continue stateloop; } @@ -3090,7 +3074,7 @@ public class Tokenizer implements Locator { } if (entCol == NamedCharacters.NAMES[lo].length()) { candidate = lo; - strBufMark = strBufLen; + charRefBufMark = charRefBufLen; lo++; } else if (entCol > NamedCharacters.NAMES[lo].length()) { break outer; @@ -3125,7 +3109,7 @@ public class Tokenizer implements Locator { // match. if (entCol + 1 == NamedCharacters.NAMES[lo].length()) { candidate = lo; - strBufMark = strBufLen; + charRefBufMark = charRefBufLen; } break outer; } @@ -3133,7 +3117,7 @@ public class Tokenizer implements Locator { if (hi < lo) { break outer; } - appendStrBuf(c); + appendCharRefBuf(c); continue; } @@ -3143,7 +3127,7 @@ public class Tokenizer implements Locator { * If no match can be made, then this is a parse error. */ errNoNamedCharacterMatch(); - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos; } @@ -3166,14 +3150,10 @@ public class Tokenizer implements Locator { * not a U+003B SEMICOLON (;), */ char ch; - if (strBufMark == strBufLen) { + if (charRefBufMark == charRefBufLen) { ch = c; } else { - // if (strBufOffset != -1) { - // ch = buf[strBufOffset + strBufMark]; - // } else { - ch = strBuf[strBufMark]; - // } + ch = charRefBuf[charRefBufMark]; } if (ch == '=' || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') @@ -3191,7 +3171,7 @@ public class Tokenizer implements Locator { * unconsumed, and nothing is returned. */ errNoNamedCharacterMatch(); - appendStrBufToLongStrBuf(); + appendCharRefBufToStrBuf(); reconsume = true; state = transition(state, returnState, reconsume, pos); continue stateloop; @@ -3223,14 +3203,13 @@ public class Tokenizer implements Locator { emitOrAppendTwo(val, returnState); } // this is so complicated! - if (strBufMark < strBufLen) { + if (charRefBufMark < charRefBufLen) { if ((returnState & DATA_AND_RCDATA_MASK) != 0) { - for (int i = strBufMark; i < strBufLen; i++) { - appendLongStrBuf(strBuf[i]); - } + appendStrBuf(charRefBuf, charRefBufMark, + charRefBufLen - charRefBufMark); } else { - tokenHandler.characters(strBuf, strBufMark, - strBufLen - strBufMark); + tokenHandler.characters(charRefBuf, charRefBufMark, + charRefBufLen - charRefBufMark); } } // Check if we broke out early with c being the last @@ -3239,7 +3218,7 @@ public class Tokenizer implements Locator { // early break, the next run on text should start // *after* the current character and the current // character shouldn't be reconsumed. - boolean earlyBreak = (c == ';' && strBufMark == strBufLen); + boolean earlyBreak = (c == ';' && charRefBufMark == charRefBufLen); if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = earlyBreak ? pos + 1 : pos; } @@ -3260,7 +3239,6 @@ public class Tokenizer implements Locator { break stateloop; } c = checkChar(buf, pos); - prevValue = -1; value = 0; seenDigits = false; /* @@ -3285,7 +3263,7 @@ public class Tokenizer implements Locator { * When it comes to interpreting the number, * interpret it as a hexadecimal number. */ - appendStrBuf(c); + appendCharRefBuf(c); state = transition(state, Tokenizer.HEX_NCR_LOOP, reconsume, pos); continue stateloop; default: @@ -3312,21 +3290,18 @@ public class Tokenizer implements Locator { } c = checkChar(buf, pos); } - // Deal with overflow gracefully - if (value < prevValue) { - value = 0x110000; // Value above Unicode range but - // within int - // range - } - prevValue = value; /* * Consume as many characters as match the range of * characters given above. */ + assert value >= 0: "value must not become negative."; if (c >= '0' && c <= '9') { seenDigits = true; - value *= 10; - value += c - '0'; + // Avoid overflow + if (value <= 0x10FFFF) { + value *= 10; + value += c - '0'; + } continue; } else if (c == ';') { if (seenDigits) { @@ -3338,8 +3313,8 @@ public class Tokenizer implements Locator { break decimalloop; } else { errNoDigitsInNCR(); - appendStrBuf(';'); - emitOrAppendStrBuf(returnState); + appendCharRefBuf(';'); + emitOrAppendCharRefBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos + 1; } @@ -3360,7 +3335,7 @@ public class Tokenizer implements Locator { */ if (!seenDigits) { errNoDigitsInNCR(); - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos; } @@ -3393,31 +3368,34 @@ public class Tokenizer implements Locator { break stateloop; } c = checkChar(buf, pos); - // Deal with overflow gracefully - if (value < prevValue) { - value = 0x110000; // Value above Unicode range but - // within int - // range - } - prevValue = value; /* * Consume as many characters as match the range of * characters given above. */ + assert value >= 0: "value must not become negative."; if (c >= '0' && c <= '9') { seenDigits = true; - value *= 16; - value += c - '0'; + // Avoid overflow + if (value <= 0x10FFFF) { + value *= 16; + value += c - '0'; + } continue; } else if (c >= 'A' && c <= 'F') { seenDigits = true; - value *= 16; - value += c - 'A' + 10; + // Avoid overflow + if (value <= 0x10FFFF) { + value *= 16; + value += c - 'A' + 10; + } continue; } else if (c >= 'a' && c <= 'f') { seenDigits = true; - value *= 16; - value += c - 'a' + 10; + // Avoid overflow + if (value <= 0x10FFFF) { + value *= 16; + value += c - 'a' + 10; + } continue; } else if (c == ';') { if (seenDigits) { @@ -3428,8 +3406,8 @@ public class Tokenizer implements Locator { continue stateloop; } else { errNoDigitsInNCR(); - appendStrBuf(';'); - emitOrAppendStrBuf(returnState); + appendCharRefBuf(';'); + emitOrAppendCharRefBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos + 1; } @@ -3450,7 +3428,7 @@ public class Tokenizer implements Locator { */ if (!seenDigits) { errNoDigitsInNCR(); - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); if ((returnState & DATA_AND_RCDATA_MASK) == 0) { cstart = pos; } @@ -3525,7 +3503,7 @@ public class Tokenizer implements Locator { /* * Switch to the bogus comment state. */ - clearLongStrBufAndAppend('\n'); + clearStrBufAndAppend('\n'); state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); break stateloop; case '\n': @@ -3535,7 +3513,7 @@ public class Tokenizer implements Locator { /* * Switch to the bogus comment state. */ - clearLongStrBufAndAppend('\n'); + clearStrBufAndAppend('\n'); state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); continue stateloop; case '\u0000': @@ -3569,7 +3547,7 @@ public class Tokenizer implements Locator { /* * Switch to the bogus comment state. */ - clearLongStrBufAndAppend(c); + clearStrBufAndAppend(c); state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); continue stateloop; } @@ -3592,7 +3570,7 @@ public class Tokenizer implements Locator { * reference in RCDATA state. */ flushChars(buf, pos); - clearStrBufAndAppend(c); + clearCharRefBufAndAppend(c); setAdditionalAndRememberAmpersandLocation('\u0000'); returnState = state; state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); @@ -3840,20 +3818,20 @@ public class Tokenizer implements Locator { state = transition(state, Tokenizer.DATA, reconsume, pos); continue stateloop; case '-': - appendLongStrBuf(c); + appendStrBuf(c); state = transition(state, Tokenizer.BOGUS_COMMENT_HYPHEN, reconsume, pos); break boguscommentloop; case '\r': - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); break stateloop; case '\n': - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); continue; case '\u0000': c = '\uFFFD'; // fall thru default: - appendLongStrBuf(c); + appendStrBuf(c); continue; } } @@ -3876,18 +3854,18 @@ public class Tokenizer implements Locator { appendSecondHyphenToBogusComment(); continue boguscommenthyphenloop; case '\r': - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); break stateloop; case '\n': - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); continue stateloop; case '\u0000': c = '\uFFFD'; // fall thru default: - appendLongStrBuf(c); + appendStrBuf(c); state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); continue stateloop; } @@ -4552,7 +4530,7 @@ public class Tokenizer implements Locator { folded += 0x20; } if (folded == Tokenizer.OCTYPE[index]) { - appendLongStrBuf(c); + appendStrBuf(c); } else { errBogusComment(); reconsume = true; @@ -4907,7 +4885,7 @@ public class Tokenizer implements Locator { * Set the DOCTYPE token's public identifier to * the empty string (not missing), */ - clearLongStrBuf(); + clearStrBuf(); /* * then switch to the DOCTYPE public identifier * (double-quoted) state. @@ -4923,7 +4901,7 @@ public class Tokenizer implements Locator { * Set the DOCTYPE token's public identifier to * the empty string (not missing), */ - clearLongStrBuf(); + clearStrBuf(); /* * then switch to the DOCTYPE public identifier * (single-quoted) state. @@ -4994,7 +4972,7 @@ public class Tokenizer implements Locator { * token's public identifier to the empty string * (not missing), */ - clearLongStrBuf(); + clearStrBuf(); /* * then switch to the DOCTYPE public identifier * (double-quoted) state. @@ -5008,7 +4986,7 @@ public class Tokenizer implements Locator { * public identifier to the empty string (not * missing), */ - clearLongStrBuf(); + clearStrBuf(); /* * then switch to the DOCTYPE public identifier * (single-quoted) state. @@ -5062,7 +5040,7 @@ public class Tokenizer implements Locator { * U+0022 QUOTATION MARK (") Switch to the after * DOCTYPE public identifier state. */ - publicIdentifier = longStrBufToString(); + publicIdentifier = strBufToString(); state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos); break doctypepublicidentifierdoublequotedloop; // continue stateloop; @@ -5079,7 +5057,7 @@ public class Tokenizer implements Locator { /* * Emit that DOCTYPE token. */ - publicIdentifier = longStrBufToString(); + publicIdentifier = strBufToString(); emitDoctypeToken(pos); /* * Switch to the data state. @@ -5087,10 +5065,10 @@ public class Tokenizer implements Locator { state = transition(state, Tokenizer.DATA, reconsume, pos); continue stateloop; case '\r': - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); break stateloop; case '\n': - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); continue; case '\u0000': c = '\uFFFD'; @@ -5101,7 +5079,7 @@ public class Tokenizer implements Locator { * character to the current DOCTYPE token's * public identifier. */ - appendLongStrBuf(c); + appendStrBuf(c); /* * Stay in the DOCTYPE public identifier * (double-quoted) state. @@ -5159,7 +5137,7 @@ public class Tokenizer implements Locator { * Set the DOCTYPE token's system identifier to * the empty string (not missing), */ - clearLongStrBuf(); + clearStrBuf(); /* * then switch to the DOCTYPE system identifier * (double-quoted) state. @@ -5175,7 +5153,7 @@ public class Tokenizer implements Locator { * Set the DOCTYPE token's system identifier to * the empty string (not missing), */ - clearLongStrBuf(); + clearStrBuf(); /* * then switch to the DOCTYPE system identifier * (single-quoted) state. @@ -5240,7 +5218,7 @@ public class Tokenizer implements Locator { * token's system identifier to the empty string * (not missing), */ - clearLongStrBuf(); + clearStrBuf(); /* * then switch to the DOCTYPE system identifier * (double-quoted) state. @@ -5254,7 +5232,7 @@ public class Tokenizer implements Locator { * system identifier to the empty string (not * missing), */ - clearLongStrBuf(); + clearStrBuf(); /* * then switch to the DOCTYPE system identifier * (single-quoted) state. @@ -5291,7 +5269,7 @@ public class Tokenizer implements Locator { * U+0022 QUOTATION MARK (") Switch to the after * DOCTYPE system identifier state. */ - systemIdentifier = longStrBufToString(); + systemIdentifier = strBufToString(); state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos); continue stateloop; case '>': @@ -5307,7 +5285,7 @@ public class Tokenizer implements Locator { /* * Emit that DOCTYPE token. */ - systemIdentifier = longStrBufToString(); + systemIdentifier = strBufToString(); emitDoctypeToken(pos); /* * Switch to the data state. @@ -5315,10 +5293,10 @@ public class Tokenizer implements Locator { state = transition(state, Tokenizer.DATA, reconsume, pos); continue stateloop; case '\r': - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); break stateloop; case '\n': - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); continue; case '\u0000': c = '\uFFFD'; @@ -5329,7 +5307,7 @@ public class Tokenizer implements Locator { * character to the current DOCTYPE token's * system identifier. */ - appendLongStrBuf(c); + appendStrBuf(c); /* * Stay in the DOCTYPE system identifier * (double-quoted) state. @@ -5503,7 +5481,7 @@ public class Tokenizer implements Locator { * Set the DOCTYPE token's system identifier to * the empty string (not missing), */ - clearLongStrBuf(); + clearStrBuf(); /* * then switch to the DOCTYPE public identifier * (double-quoted) state. @@ -5519,7 +5497,7 @@ public class Tokenizer implements Locator { * Set the DOCTYPE token's public identifier to * the empty string (not missing), */ - clearLongStrBuf(); + clearStrBuf(); /* * then switch to the DOCTYPE public identifier * (single-quoted) state. @@ -5590,7 +5568,7 @@ public class Tokenizer implements Locator { * token's system identifier to the empty string * (not missing), */ - clearLongStrBuf(); + clearStrBuf(); /* * then switch to the DOCTYPE system identifier * (double-quoted) state. @@ -5603,7 +5581,7 @@ public class Tokenizer implements Locator { * system identifier to the empty string (not * missing), */ - clearLongStrBuf(); + clearStrBuf(); /* * then switch to the DOCTYPE system identifier * (single-quoted) state. @@ -5658,7 +5636,7 @@ public class Tokenizer implements Locator { * U+0027 APOSTROPHE (') Switch to the after * DOCTYPE system identifier state. */ - systemIdentifier = longStrBufToString(); + systemIdentifier = strBufToString(); state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos); continue stateloop; case '>': @@ -5671,7 +5649,7 @@ public class Tokenizer implements Locator { /* * Emit that DOCTYPE token. */ - systemIdentifier = longStrBufToString(); + systemIdentifier = strBufToString(); emitDoctypeToken(pos); /* * Switch to the data state. @@ -5679,10 +5657,10 @@ public class Tokenizer implements Locator { state = transition(state, Tokenizer.DATA, reconsume, pos); continue stateloop; case '\r': - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); break stateloop; case '\n': - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); continue; case '\u0000': c = '\uFFFD'; @@ -5693,7 +5671,7 @@ public class Tokenizer implements Locator { * character to the current DOCTYPE token's * system identifier. */ - appendLongStrBuf(c); + appendStrBuf(c); /* * Stay in the DOCTYPE system identifier * (double-quoted) state. @@ -5717,7 +5695,7 @@ public class Tokenizer implements Locator { * U+0027 APOSTROPHE (') Switch to the after * DOCTYPE public identifier state. */ - publicIdentifier = longStrBufToString(); + publicIdentifier = strBufToString(); state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos); continue stateloop; case '>': @@ -5730,7 +5708,7 @@ public class Tokenizer implements Locator { /* * Emit that DOCTYPE token. */ - publicIdentifier = longStrBufToString(); + publicIdentifier = strBufToString(); emitDoctypeToken(pos); /* * Switch to the data state. @@ -5738,10 +5716,10 @@ public class Tokenizer implements Locator { state = transition(state, Tokenizer.DATA, reconsume, pos); continue stateloop; case '\r': - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); break stateloop; case '\n': - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); continue; case '\u0000': c = '\uFFFD'; @@ -5752,7 +5730,7 @@ public class Tokenizer implements Locator { * character to the current DOCTYPE token's * public identifier. */ - appendLongStrBuf(c); + appendStrBuf(c); /* * Stay in the DOCTYPE public identifier * (single-quoted) state. @@ -5831,26 +5809,26 @@ public class Tokenizer implements Locator { forceQuirks = false; } - @Inline private void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn() + @Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn() throws SAXException { silentCarriageReturn(); - adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n'); + adjustDoubleHyphenAndAppendToStrBufAndErr('\n'); } - @Inline private void adjustDoubleHyphenAndAppendToLongStrBufLineFeed() + @Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed() throws SAXException { silentLineFeed(); - adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n'); + adjustDoubleHyphenAndAppendToStrBufAndErr('\n'); } - @Inline private void appendLongStrBufLineFeed() { + @Inline private void appendStrBufLineFeed() { silentLineFeed(); - appendLongStrBuf('\n'); + appendStrBuf('\n'); } - @Inline private void appendLongStrBufCarriageReturn() { + @Inline private void appendStrBufCarriageReturn() { silentCarriageReturn(); - appendLongStrBuf('\n'); + appendStrBuf('\n'); } @Inline protected void silentCarriageReturn() { @@ -5901,14 +5879,6 @@ public class Tokenizer implements Locator { forceQuirks = false; } - private void emitOrAppendStrBuf(int returnState) throws SAXException { - if ((returnState & DATA_AND_RCDATA_MASK) != 0) { - appendStrBufToLongStrBuf(); - } else { - emitStrBuf(); - } - } - private void handleNcrValue(int returnState) throws SAXException { /* * If one or more characters match the range, then take them all and @@ -6114,7 +6084,7 @@ public class Tokenizer implements Locator { break eofloop; case MARKUP_DECLARATION_OPEN: errBogusComment(); - clearLongStrBuf(); + clearStrBuf(); emitComment(0, 0); break eofloop; case MARKUP_DECLARATION_HYPHEN: @@ -6250,7 +6220,7 @@ public class Tokenizer implements Locator { /* * Emit that DOCTYPE token. */ - publicIdentifier = longStrBufToString(); + publicIdentifier = strBufToString(); emitDoctypeToken(0); /* * Reconsume the EOF character in the data state. @@ -6283,7 +6253,7 @@ public class Tokenizer implements Locator { /* * Emit that DOCTYPE token. */ - systemIdentifier = longStrBufToString(); + systemIdentifier = strBufToString(); emitDoctypeToken(0); /* * Reconsume the EOF character in the data state. @@ -6331,12 +6301,12 @@ public class Tokenizer implements Locator { * character): */ - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); state = returnState; continue; case CHARACTER_REFERENCE_HILO_LOOKUP: errNoNamedCharacterMatch(); - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); state = returnState; continue; case CHARACTER_REFERENCE_TAIL: @@ -6372,7 +6342,7 @@ public class Tokenizer implements Locator { } if (entCol == NamedCharacters.NAMES[lo].length()) { candidate = lo; - strBufMark = strBufLen; + charRefBufMark = charRefBufLen; lo++; } else if (entCol > NamedCharacters.NAMES[lo].length()) { break outer; @@ -6393,7 +6363,7 @@ public class Tokenizer implements Locator { * If no match can be made, then this is a parse error. */ errNoNamedCharacterMatch(); - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); state = returnState; continue eofloop; } else { @@ -6411,10 +6381,10 @@ public class Tokenizer implements Locator { * not a U+003B SEMICOLON (;), */ char ch; - if (strBufMark == strBufLen) { + if (charRefBufMark == charRefBufLen) { ch = '\u0000'; } else { - ch = strBuf[strBufMark]; + ch = charRefBuf[charRefBufMark]; } if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') @@ -6431,7 +6401,7 @@ public class Tokenizer implements Locator { * unconsumed, and nothing is returned. */ errNoNamedCharacterMatch(); - appendStrBufToLongStrBuf(); + appendCharRefBufToStrBuf(); state = returnState; continue eofloop; } @@ -6461,14 +6431,13 @@ public class Tokenizer implements Locator { emitOrAppendTwo(val, returnState); } // this is so complicated! - if (strBufMark < strBufLen) { + if (charRefBufMark < charRefBufLen) { if ((returnState & DATA_AND_RCDATA_MASK) != 0) { - for (int i = strBufMark; i < strBufLen; i++) { - appendLongStrBuf(strBuf[i]); - } + appendStrBuf(charRefBuf, charRefBufMark, + charRefBufLen - charRefBufMark); } else { - tokenHandler.characters(strBuf, strBufMark, - strBufLen - strBufMark); + tokenHandler.characters(charRefBuf, charRefBufMark, + charRefBufLen - charRefBufMark); } } state = returnState; @@ -6495,7 +6464,7 @@ public class Tokenizer implements Locator { */ if (!seenDigits) { errNoDigitsInNCR(); - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); state = returnState; continue; } else { @@ -6558,8 +6527,8 @@ public class Tokenizer implements Locator { private void emitOrAppendTwo(@Const @NoLength char[] val, int returnState) throws SAXException { if ((returnState & DATA_AND_RCDATA_MASK) != 0) { - appendLongStrBuf(val[0]); - appendLongStrBuf(val[1]); + appendStrBuf(val[0]); + appendStrBuf(val[1]); } else { tokenHandler.characters(val, 0, 2); } @@ -6568,7 +6537,7 @@ public class Tokenizer implements Locator { private void emitOrAppendOne(@Const @NoLength char[] val, int returnState) throws SAXException { if ((returnState & DATA_AND_RCDATA_MASK) != 0) { - appendLongStrBuf(val[0]); + appendStrBuf(val[0]); } else { tokenHandler.characters(val, 0, 1); } @@ -6576,7 +6545,6 @@ public class Tokenizer implements Locator { public void end() throws SAXException { strBuf = null; - longStrBuf = null; doctypeName = null; if (systemIdentifier != null) { Portability.releaseString(systemIdentifier); @@ -6652,7 +6620,7 @@ public class Tokenizer implements Locator { public void resetToDataState() { strBufLen = 0; - longStrBufLen = 0; + charRefBufLen = 0; stateSave = Tokenizer.DATA; // line = 1; XXX line numbers lastCR = false; @@ -6664,8 +6632,7 @@ public class Tokenizer implements Locator { lo = 0; hi = 0; // will always be overwritten before use anyway candidate = -1; - strBufMark = 0; - prevValue = -1; + charRefBufMark = 0; value = 0; seenDigits = false; endTag = false; @@ -6694,11 +6661,8 @@ public class Tokenizer implements Locator { } System.arraycopy(other.strBuf, 0, strBuf, 0, strBufLen); - longStrBufLen = other.longStrBufLen; - if (longStrBufLen > longStrBuf.length) { - longStrBuf = new char[longStrBufLen]; - } - System.arraycopy(other.longStrBuf, 0, longStrBuf, 0, longStrBufLen); + charRefBufLen = other.charRefBufLen; + System.arraycopy(other.charRefBuf, 0, charRefBuf, 0, charRefBufLen); stateSave = other.stateSave; returnStateSave = other.returnStateSave; @@ -6714,8 +6678,7 @@ public class Tokenizer implements Locator { lo = other.lo; hi = other.hi; candidate = other.candidate; - strBufMark = other.strBufMark; - prevValue = other.prevValue; + charRefBufMark = other.charRefBufMark; value = other.value; seenDigits = other.seenDigits; endTag = other.endTag; @@ -6770,8 +6733,7 @@ public class Tokenizer implements Locator { public void initializeWithoutStarting() throws SAXException { confident = false; - strBuf = new char[64]; - longStrBuf = new char[1024]; + strBuf = null; line = 1; // [NOCPP[ html4 = false; diff --git a/parser/html/javasrc/TreeBuilder.java b/parser/html/javasrc/TreeBuilder.java index 7033da54a9..bcc4fa8d22 100644 --- a/parser/html/javasrc/TreeBuilder.java +++ b/parser/html/javasrc/TreeBuilder.java @@ -602,7 +602,7 @@ public abstract class TreeBuilder implements TokenHandler, // ]NOCPP] start(fragment); charBufferLen = 0; - charBuffer = new char[1024]; + charBuffer = null; framesetOk = true; if (fragment) { T elt; @@ -5590,14 +5590,30 @@ public abstract class TreeBuilder implements TokenHandler, private final void accumulateCharactersForced(@Const @NoLength char[] buf, int start, int length) throws SAXException { - int newLen = charBufferLen + length; - if (newLen > charBuffer.length) { - char[] newBuf = new char[newLen]; + System.arraycopy(buf, start, charBuffer, charBufferLen, length); + charBufferLen += length; + } + + @Override public void ensureBufferSpace(int inputLength) + throws SAXException { + // TODO: Unify Tokenizer.strBuf and TreeBuilder.charBuffer so that + // this method becomes unnecessary. + int worstCase = charBufferLen + inputLength; + if (charBuffer == null) { + // Add an arbitrary small value to avoid immediate reallocation + // once there are a few characters in the buffer. + charBuffer = new char[worstCase + 128]; + } else if (worstCase > charBuffer.length) { + // HotSpot reportedly allocates memory with 8-byte accuracy, so + // there's no point in trying to do math here to avoid slop. + // Maybe we should add some small constant to worstCase here + // but not doing that without profiling. In C++ with jemalloc, + // the corresponding method should do math to round up here + // to avoid slop. + char[] newBuf = new char[worstCase]; System.arraycopy(charBuffer, 0, newBuf, 0, charBufferLen); charBuffer = newBuf; } - System.arraycopy(buf, start, charBuffer, charBufferLen, length); - charBufferLen = newLen; } // ]NOCPP] diff --git a/parser/html/javasrc/UTF16Buffer.java b/parser/html/javasrc/UTF16Buffer.java index d73a80004e..ec79185ec2 100644 --- a/parser/html/javasrc/UTF16Buffer.java +++ b/parser/html/javasrc/UTF16Buffer.java @@ -117,6 +117,15 @@ public final class UTF16Buffer { return start < end; } + /** + * Returns end - start. + * + * @return end - start + */ + public int getLength() { + return end - start; + } + /** * Adjusts the start index to skip over the first character if it is a line * feed and the previous character was a carriage return. diff --git a/parser/html/nsHtml5Parser.cpp b/parser/html/nsHtml5Parser.cpp index 74800a7cc9..f27ac34d0a 100644 --- a/parser/html/nsHtml5Parser.cpp +++ b/parser/html/nsHtml5Parser.cpp @@ -370,6 +370,9 @@ nsHtml5Parser::Parse(const nsAString& aSourceBuffer, lineNumberSave = mTokenizer->getLineNumber(); } + if (!mTokenizer->EnsureBufferSpace(stackBuffer.getLength())) { + return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); + } mLastWasCR = mTokenizer->tokenizeBuffer(&stackBuffer); if (inRootContext) { @@ -476,6 +479,10 @@ nsHtml5Parser::Parse(const nsAString& aSourceBuffer, while (stackBuffer.hasMore()) { stackBuffer.adjust(mDocWriteSpeculativeLastWasCR); if (stackBuffer.hasMore()) { + if (!mDocWriteSpeculativeTokenizer->EnsureBufferSpace( + stackBuffer.getLength())) { + return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); + } mDocWriteSpeculativeLastWasCR = mDocWriteSpeculativeTokenizer->tokenizeBuffer(&stackBuffer); } @@ -613,16 +620,19 @@ nsHtml5Parser::ParseUntilBlocked() return NS_OK; } if (mDocumentClosed) { + nsresult rv; NS_ASSERTION(!GetStreamParser(), "This should only happen with script-created parser."); - mTokenizer->eof(); - mTreeBuilder->StreamEnded(); + if (NS_SUCCEEDED((rv = mExecutor->IsBroken()))) { + mTokenizer->eof(); + mTreeBuilder->StreamEnded(); + } mTreeBuilder->Flush(); mExecutor->FlushDocumentWrite(); // The below call does memory cleanup, so call it even if the // parser has been marked as broken. mTokenizer->end(); - return NS_OK; + return rv; } // never release the last buffer. NS_ASSERTION(!mLastBuffer->getStart() && !mLastBuffer->getEnd(), @@ -662,6 +672,9 @@ nsHtml5Parser::ParseUntilBlocked() if (inRootContext) { mTokenizer->setLineNumber(mRootContextLineNumber); } + if (!mTokenizer->EnsureBufferSpace(mFirstBuffer->getLength())) { + return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); + } mLastWasCR = mTokenizer->tokenizeBuffer(mFirstBuffer); if (inRootContext) { mRootContextLineNumber = mTokenizer->getLineNumber(); diff --git a/parser/html/nsHtml5StreamParser.cpp b/parser/html/nsHtml5StreamParser.cpp index 2ff1896cbe..6d34c54e81 100644 --- a/parser/html/nsHtml5StreamParser.cpp +++ b/parser/html/nsHtml5StreamParser.cpp @@ -890,7 +890,10 @@ nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext) mTreeBuilder->StartPlainText(); mTokenizer->StartPlainText(); } else if (mMode == VIEW_SOURCE_PLAIN) { - mTreeBuilder->StartPlainTextViewSource(NS_ConvertUTF8toUTF16(mViewSourceTitle)); + nsAutoString viewSourceTitle; + CopyUTF8toUTF16(mViewSourceTitle, viewSourceTitle); + mTreeBuilder->EnsureBufferSpace(viewSourceTitle.Length()); + mTreeBuilder->StartPlainTextViewSource(viewSourceTitle); mTokenizer->StartPlainText(); } @@ -1364,10 +1367,12 @@ nsHtml5StreamParser::ParseAvailableData() 0); } } - mTokenizer->eof(); - mTreeBuilder->StreamEnded(); - if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) { - mTokenizer->EndViewSource(); + if (NS_SUCCEEDED(mTreeBuilder->IsBroken())) { + mTokenizer->eof(); + mTreeBuilder->StreamEnded(); + if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) { + mTokenizer->EndViewSource(); + } } FlushTreeOpsAndDisarmTimer(); return; // no more data and not expecting more @@ -1384,6 +1389,10 @@ nsHtml5StreamParser::ParseAvailableData() mFirstBuffer->adjust(mLastWasCR); mLastWasCR = false; if (mFirstBuffer->hasMore()) { + if (!mTokenizer->EnsureBufferSpace(mFirstBuffer->getLength())) { + MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); + return; + } mLastWasCR = mTokenizer->tokenizeBuffer(mFirstBuffer); // At this point, internalEncodingDeclaration() may have called // Terminate, but that never happens together with script. diff --git a/parser/html/nsHtml5StringParser.cpp b/parser/html/nsHtml5StringParser.cpp index 70f63571af..e3b49c3305 100644 --- a/parser/html/nsHtml5StringParser.cpp +++ b/parser/html/nsHtml5StringParser.cpp @@ -109,6 +109,10 @@ nsHtml5StringParser::Tokenize(const nsAString& aSourceBuffer, buffer.adjust(lastWasCR); lastWasCR = false; if (buffer.hasMore()) { + if (!mTokenizer->EnsureBufferSpace(buffer.getLength())) { + rv = mBuilder->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); + break; + } lastWasCR = mTokenizer->tokenizeBuffer(&buffer); if (NS_FAILED(rv = mBuilder->IsBroken())) { break; @@ -116,7 +120,9 @@ nsHtml5StringParser::Tokenize(const nsAString& aSourceBuffer, } } } - mTokenizer->eof(); + if (NS_SUCCEEDED(rv)) { + mTokenizer->eof(); + } mTokenizer->end(); mBuilder->Finish(); mAtomTable.Clear(); diff --git a/parser/html/nsHtml5Tokenizer.cpp b/parser/html/nsHtml5Tokenizer.cpp index d40c3a4220..369ba59dab 100644 --- a/parser/html/nsHtml5Tokenizer.cpp +++ b/parser/html/nsHtml5Tokenizer.cpp @@ -91,6 +91,7 @@ staticJArray nsHtml5Tokenizer::NOFRAMES_ARR = { NOFRAMES_ARR_D nsHtml5Tokenizer::nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource) : tokenHandler(tokenHandler), encodingDeclarationHandler(nullptr), + charRefBuf(jArray::newJArray(32)), bmpChar(jArray::newJArray(1)), astralChar(jArray::newJArray(2)), tagName(nullptr), @@ -207,14 +208,21 @@ nsHtml5Tokenizer::emptyAttributes() return nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES; } +void +nsHtml5Tokenizer::emitOrAppendCharRefBuf(int32_t returnState) +{ + if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { + appendCharRefBufToStrBuf(); + } else { + if (charRefBufLen > 0) { + tokenHandler->characters(charRefBuf, 0, charRefBufLen); + } + } +} + void nsHtml5Tokenizer::appendStrBuf(char16_t c) { - if (strBufLen == strBuf.length) { - jArray newBuf = jArray::newJArray(strBuf.length + NS_HTML5TOKENIZER_BUFFER_GROW_BY); - nsHtml5ArrayCopy::arraycopy(strBuf, newBuf, strBuf.length); - strBuf = newBuf; - } strBuf[strBufLen++] = c; } @@ -239,39 +247,22 @@ nsHtml5Tokenizer::emitStrBuf() } void -nsHtml5Tokenizer::appendLongStrBuf(char16_t c) +nsHtml5Tokenizer::appendStrBuf(char16_t* buffer, int32_t offset, int32_t length) { - if (longStrBufLen == longStrBuf.length) { - jArray newBuf = jArray::newJArray(longStrBufLen + (longStrBufLen >> 1)); - nsHtml5ArrayCopy::arraycopy(longStrBuf, newBuf, longStrBuf.length); - longStrBuf = newBuf; - } - longStrBuf[longStrBufLen++] = c; -} - -void -nsHtml5Tokenizer::appendLongStrBuf(char16_t* buffer, int32_t offset, int32_t length) -{ - int32_t reqLen = longStrBufLen + length; - if (longStrBuf.length < reqLen) { + int32_t reqLen = strBufLen + length; + if (strBuf.length < reqLen) { jArray newBuf = jArray::newJArray(reqLen + (reqLen >> 1)); - nsHtml5ArrayCopy::arraycopy(longStrBuf, newBuf, longStrBuf.length); - longStrBuf = newBuf; + nsHtml5ArrayCopy::arraycopy(strBuf, newBuf, strBuf.length); + strBuf = newBuf; } - nsHtml5ArrayCopy::arraycopy(buffer, offset, longStrBuf, longStrBufLen, length); - longStrBufLen = reqLen; -} - -nsString* -nsHtml5Tokenizer::longStrBufToString() -{ - return nsHtml5Portability::newStringFromBuffer(longStrBuf, 0, longStrBufLen); + nsHtml5ArrayCopy::arraycopy(buffer, offset, strBuf, strBufLen, length); + strBufLen = reqLen; } void nsHtml5Tokenizer::emitComment(int32_t provisionalHyphens, int32_t pos) { - tokenHandler->comment(longStrBuf, 0, longStrBufLen - provisionalHyphens); + tokenHandler->comment(strBuf, 0, strBufLen - provisionalHyphens); cstart = pos + 1; } @@ -353,7 +344,7 @@ void nsHtml5Tokenizer::addAttributeWithValue() { if (attributeName) { - nsString* val = longStrBufToString(); + nsString* val = strBufToString(); if (mViewSource) { mViewSource->MaybeLinkifyAttributeValue(attributeName, val); } @@ -378,6 +369,7 @@ nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer) shouldSuspend = false; lastCR = false; int32_t start = buffer->getStart(); + int32_t end = buffer->getEnd(); int32_t pos = start - 1; switch(state) { case NS_HTML5TOKENIZER_DATA: @@ -412,7 +404,7 @@ nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer) } else { pos = stateLoop(state, c, pos, buffer->getBuffer(), false, returnState, buffer->getEnd()); } - if (pos == buffer->getEnd()) { + if (pos == end) { buffer->setStart(pos); } else { buffer->setStart(pos + 1); @@ -439,7 +431,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu switch(c) { case '&': { flushChars(buf, pos); - clearStrBufAndAppend(c); + clearCharRefBufAndAppend(c); setAdditionalAndRememberAmpersandLocation('\0'); returnState = state; state = P::transition(mViewSource, NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE, reconsume, pos); @@ -502,7 +494,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu if (P::reportErrors) { errProcessingInstruction(); } - clearLongStrBufAndAppend(c); + clearStrBufAndAppend(c); state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_COMMENT, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -720,19 +712,19 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu continue; } case '\"': { - clearLongStrBuf(); + clearStrBuf(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_DOUBLE_QUOTED, reconsume, pos); NS_HTML5_BREAK(beforeattributevalueloop); } case '&': { - clearLongStrBuf(); + clearStrBuf(); reconsume = true; state = P::transition(mViewSource, NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } case '\'': { - clearLongStrBuf(); + clearStrBuf(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -758,7 +750,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu } } default: { - clearLongStrBufAndAppend(c); + clearStrBufAndAppend(c); state = P::transition(mViewSource, NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos); NS_HTML5_CONTINUE(stateloop); @@ -784,25 +776,25 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu NS_HTML5_BREAK(attributevaluedoublequotedloop); } case '&': { - clearStrBufAndAppend(c); + clearCharRefBufAndAppend(c); setAdditionalAndRememberAmpersandLocation('\"'); returnState = state; state = P::transition(mViewSource, NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } case '\r': { - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); continue; } case '\0': { c = 0xfffd; } default: { - appendLongStrBuf(c); + appendStrBuf(c); continue; } } @@ -904,7 +896,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu NS_HTML5_CONTINUE(stateloop); } case '&': { - clearStrBufAndAppend(c); + clearCharRefBufAndAppend(c); setAdditionalAndRememberAmpersandLocation('>'); returnState = state; state = P::transition(mViewSource, NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE, reconsume, pos); @@ -932,7 +924,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu } default: { - appendLongStrBuf(c); + appendStrBuf(c); continue; } } @@ -1004,20 +996,20 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu c = checkChar(buf, pos); switch(c) { case '-': { - clearLongStrBufAndAppend(c); + clearStrBufAndAppend(c); state = P::transition(mViewSource, NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN, reconsume, pos); NS_HTML5_BREAK(markupdeclarationopenloop); } case 'd': case 'D': { - clearLongStrBufAndAppend(c); + clearStrBufAndAppend(c); index = 0; state = P::transition(mViewSource, NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } case '[': { if (tokenHandler->cdataSectionAllowed()) { - clearLongStrBufAndAppend(c); + clearStrBufAndAppend(c); index = 0; state = P::transition(mViewSource, NS_HTML5TOKENIZER_CDATA_START, reconsume, pos); NS_HTML5_CONTINUE(stateloop); @@ -1027,7 +1019,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu if (P::reportErrors) { errBogusComment(); } - clearLongStrBuf(); + clearStrBuf(); reconsume = true; state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_COMMENT, reconsume, pos); NS_HTML5_CONTINUE(stateloop); @@ -1047,7 +1039,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu NS_HTML5_BREAK(stateloop); } case '-': { - clearLongStrBuf(); + clearStrBuf(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT_START, reconsume, pos); NS_HTML5_BREAK(markupdeclarationhyphenloop); } @@ -1071,7 +1063,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu c = checkChar(buf, pos); switch(c) { case '-': { - appendLongStrBuf(c); + appendStrBuf(c); state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT_START_DASH, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -1084,12 +1076,12 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu NS_HTML5_CONTINUE(stateloop); } case '\r': { - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos); NS_HTML5_BREAK(stateloop); } case '\n': { - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos); NS_HTML5_BREAK(commentstartloop); } @@ -1097,7 +1089,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu c = 0xfffd; } default: { - appendLongStrBuf(c); + appendStrBuf(c); state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos); NS_HTML5_BREAK(commentstartloop); } @@ -1113,23 +1105,23 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu c = checkChar(buf, pos); switch(c) { case '-': { - appendLongStrBuf(c); + appendStrBuf(c); state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT_END_DASH, reconsume, pos); NS_HTML5_BREAK(commentloop); } case '\r': { - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); continue; } case '\0': { c = 0xfffd; } default: { - appendLongStrBuf(c); + appendStrBuf(c); continue; } } @@ -1144,17 +1136,17 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu c = checkChar(buf, pos); switch(c) { case '-': { - appendLongStrBuf(c); + appendStrBuf(c); state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT_END, reconsume, pos); NS_HTML5_BREAK(commentenddashloop); } case '\r': { - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos); NS_HTML5_BREAK(stateloop); } case '\n': { - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -1162,7 +1154,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu c = 0xfffd; } default: { - appendLongStrBuf(c); + appendStrBuf(c); state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -1183,16 +1175,16 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu NS_HTML5_CONTINUE(stateloop); } case '-': { - adjustDoubleHyphenAndAppendToLongStrBufAndErr(c); + adjustDoubleHyphenAndAppendToStrBufAndErr(c); continue; } case '\r': { - adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn(); + adjustDoubleHyphenAndAppendToStrBufCarriageReturn(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos); NS_HTML5_BREAK(stateloop); } case '\n': { - adjustDoubleHyphenAndAppendToLongStrBufLineFeed(); + adjustDoubleHyphenAndAppendToStrBufLineFeed(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -1200,7 +1192,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu if (P::reportErrors) { errHyphenHyphenBang(); } - appendLongStrBuf(c); + appendStrBuf(c); state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT_END_BANG, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -1208,7 +1200,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu c = 0xfffd; } default: { - adjustDoubleHyphenAndAppendToLongStrBufAndErr(c); + adjustDoubleHyphenAndAppendToStrBufAndErr(c); state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -1229,23 +1221,23 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu NS_HTML5_CONTINUE(stateloop); } case '-': { - appendLongStrBuf(c); + appendStrBuf(c); state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT_END_DASH, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } case '\r': { - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); continue; } case '\0': { c = 0xfffd; } default: { - appendLongStrBuf(c); + appendStrBuf(c); state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -1259,7 +1251,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu c = checkChar(buf, pos); switch(c) { case '-': { - appendLongStrBuf(c); + appendStrBuf(c); state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT_END, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -1272,12 +1264,12 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu NS_HTML5_CONTINUE(stateloop); } case '\r': { - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos); NS_HTML5_BREAK(stateloop); } case '\n': { - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -1285,7 +1277,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu c = 0xfffd; } default: { - appendLongStrBuf(c); + appendStrBuf(c); state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -1299,7 +1291,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu c = checkChar(buf, pos); if (index < 6) { if (c == nsHtml5Tokenizer::CDATA_LSQB[index]) { - appendLongStrBuf(c); + appendStrBuf(c); } else { if (P::reportErrors) { errBogusComment(); @@ -1418,25 +1410,25 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu NS_HTML5_CONTINUE(stateloop); } case '&': { - clearStrBufAndAppend(c); + clearCharRefBufAndAppend(c); setAdditionalAndRememberAmpersandLocation('\''); returnState = state; state = P::transition(mViewSource, NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE, reconsume, pos); NS_HTML5_BREAK(attributevaluesinglequotedloop); } case '\r': { - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); continue; } case '\0': { c = 0xfffd; } default: { - appendLongStrBuf(c); + appendStrBuf(c); continue; } } @@ -1459,7 +1451,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu case '\f': case '<': case '&': { - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = pos; } @@ -1468,13 +1460,13 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu NS_HTML5_CONTINUE(stateloop); } case '#': { - appendStrBuf('#'); + appendCharRefBuf('#'); state = P::transition(mViewSource, NS_HTML5TOKENIZER_CONSUME_NCR, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } default: { if (c == additional) { - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); reconsume = true; state = P::transition(mViewSource, returnState, reconsume, pos); NS_HTML5_CONTINUE(stateloop); @@ -1487,7 +1479,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu if (P::reportErrors) { errNoNamedCharacterMatch(); } - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = pos; } @@ -1495,7 +1487,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu state = P::transition(mViewSource, returnState, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } - appendStrBuf(c); + appendCharRefBuf(c); state = P::transition(mViewSource, NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP, reconsume, pos); } } @@ -1520,7 +1512,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu if (P::reportErrors) { errNoNamedCharacterMatch(); } - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = pos; } @@ -1528,12 +1520,12 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu state = P::transition(mViewSource, returnState, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } - appendStrBuf(c); + appendCharRefBuf(c); lo = hilo & 0xFFFF; hi = hilo >> 16; entCol = -1; candidate = -1; - strBufMark = 0; + charRefBufMark = 0; state = P::transition(mViewSource, NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL, reconsume, pos); } } @@ -1553,7 +1545,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu } if (entCol == nsHtml5NamedCharacters::NAMES[lo].length()) { candidate = lo; - strBufMark = strBufLen; + charRefBufMark = charRefBufLen; lo++; } else if (entCol > nsHtml5NamedCharacters::NAMES[lo].length()) { NS_HTML5_BREAK(outer); @@ -1583,14 +1575,14 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu if (c == ';') { if (entCol + 1 == nsHtml5NamedCharacters::NAMES[lo].length()) { candidate = lo; - strBufMark = strBufLen; + charRefBufMark = charRefBufLen; } NS_HTML5_BREAK(outer); } if (hi < lo) { NS_HTML5_BREAK(outer); } - appendStrBuf(c); + appendCharRefBuf(c); continue; } outer_end: ; @@ -1598,7 +1590,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu if (P::reportErrors) { errNoNamedCharacterMatch(); } - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = pos; } @@ -1610,16 +1602,16 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu if (!candidateName.length() || candidateName.charAt(candidateName.length() - 1) != ';') { if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { char16_t ch; - if (strBufMark == strBufLen) { + if (charRefBufMark == charRefBufLen) { ch = c; } else { - ch = strBuf[strBufMark]; + ch = charRefBuf[charRefBufMark]; } if (ch == '=' || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) { if (P::reportErrors) { errNoNamedCharacterMatch(); } - appendStrBufToLongStrBuf(); + appendCharRefBufToStrBuf(); reconsume = true; state = P::transition(mViewSource, returnState, reconsume, pos); NS_HTML5_CONTINUE(stateloop); @@ -1642,16 +1634,14 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu } else { emitOrAppendTwo(val, returnState); } - if (strBufMark < strBufLen) { + if (charRefBufMark < charRefBufLen) { if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { - for (int32_t i = strBufMark; i < strBufLen; i++) { - appendLongStrBuf(strBuf[i]); - } + appendStrBuf(charRefBuf, charRefBufMark, charRefBufLen - charRefBufMark); } else { - tokenHandler->characters(strBuf, strBufMark, strBufLen - strBufMark); + tokenHandler->characters(charRefBuf, charRefBufMark, charRefBufLen - charRefBufMark); } } - bool earlyBreak = (c == ';' && strBufMark == strBufLen); + bool earlyBreak = (c == ';' && charRefBufMark == charRefBufLen); if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = earlyBreak ? pos + 1 : pos; } @@ -1665,13 +1655,12 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); - prevValue = -1; value = 0; seenDigits = false; switch(c) { case 'x': case 'X': { - appendStrBuf(c); + appendCharRefBuf(c); state = P::transition(mViewSource, NS_HTML5TOKENIZER_HEX_NCR_LOOP, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -1691,14 +1680,13 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu } c = checkChar(buf, pos); } - if (value < prevValue) { - value = 0x110000; - } - prevValue = value; + MOZ_ASSERT(value >= 0, "value must not become negative."); if (c >= '0' && c <= '9') { seenDigits = true; - value *= 10; - value += c - '0'; + if (value <= 0x10FFFF) { + value *= 10; + value += c - '0'; + } continue; } else if (c == ';') { if (seenDigits) { @@ -1711,8 +1699,8 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu if (P::reportErrors) { errNoDigitsInNCR(); } - appendStrBuf(';'); - emitOrAppendStrBuf(returnState); + appendCharRefBuf(';'); + emitOrAppendCharRefBuf(returnState); if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = pos + 1; } @@ -1724,7 +1712,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu if (P::reportErrors) { errNoDigitsInNCR(); } - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = pos; } @@ -1757,24 +1745,27 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu NS_HTML5_BREAK(stateloop); } c = checkChar(buf, pos); - if (value < prevValue) { - value = 0x110000; - } - prevValue = value; + MOZ_ASSERT(value >= 0, "value must not become negative."); if (c >= '0' && c <= '9') { seenDigits = true; - value *= 16; - value += c - '0'; + if (value <= 0x10FFFF) { + value *= 16; + value += c - '0'; + } continue; } else if (c >= 'A' && c <= 'F') { seenDigits = true; - value *= 16; - value += c - 'A' + 10; + if (value <= 0x10FFFF) { + value *= 16; + value += c - 'A' + 10; + } continue; } else if (c >= 'a' && c <= 'f') { seenDigits = true; - value *= 16; - value += c - 'a' + 10; + if (value <= 0x10FFFF) { + value *= 16; + value += c - 'a' + 10; + } continue; } else if (c == ';') { if (seenDigits) { @@ -1787,8 +1778,8 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu if (P::reportErrors) { errNoDigitsInNCR(); } - appendStrBuf(';'); - emitOrAppendStrBuf(returnState); + appendCharRefBuf(';'); + emitOrAppendCharRefBuf(returnState); if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = pos + 1; } @@ -1800,7 +1791,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu if (P::reportErrors) { errNoDigitsInNCR(); } - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { cstart = pos; } @@ -1869,7 +1860,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu if (P::reportErrors) { errGarbageAfterLtSlash(); } - clearLongStrBufAndAppend('\n'); + clearStrBufAndAppend('\n'); state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_COMMENT, reconsume, pos); NS_HTML5_BREAK(stateloop); } @@ -1878,7 +1869,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu if (P::reportErrors) { errGarbageAfterLtSlash(); } - clearLongStrBufAndAppend('\n'); + clearStrBufAndAppend('\n'); state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_COMMENT, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -1898,7 +1889,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu if (P::reportErrors) { errGarbageAfterLtSlash(); } - clearLongStrBufAndAppend(c); + clearStrBufAndAppend(c); state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_COMMENT, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -1918,7 +1909,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu switch(c) { case '&': { flushChars(buf, pos); - clearStrBufAndAppend(c); + clearCharRefBufAndAppend(c); setAdditionalAndRememberAmpersandLocation('\0'); returnState = state; state = P::transition(mViewSource, NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE, reconsume, pos); @@ -2091,23 +2082,23 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu NS_HTML5_CONTINUE(stateloop); } case '-': { - appendLongStrBuf(c); + appendStrBuf(c); state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN, reconsume, pos); NS_HTML5_BREAK(boguscommentloop); } case '\r': { - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); continue; } case '\0': { c = 0xfffd; } default: { - appendLongStrBuf(c); + appendStrBuf(c); continue; } } @@ -2131,12 +2122,12 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu NS_HTML5_CONTINUE(boguscommenthyphenloop); } case '\r': { - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_COMMENT, reconsume, pos); NS_HTML5_BREAK(stateloop); } case '\n': { - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_COMMENT, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -2144,7 +2135,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu c = 0xfffd; } default: { - appendLongStrBuf(c); + appendStrBuf(c); state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_COMMENT, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -2641,7 +2632,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu folded += 0x20; } if (folded == nsHtml5Tokenizer::OCTYPE[index]) { - appendLongStrBuf(c); + appendStrBuf(c); } else { if (P::reportErrors) { errBogusComment(); @@ -2889,7 +2880,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu if (P::reportErrors) { errNoSpaceBetweenDoctypePublicKeywordAndQuote(); } - clearLongStrBuf(); + clearStrBuf(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -2897,7 +2888,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu if (P::reportErrors) { errNoSpaceBetweenDoctypePublicKeywordAndQuote(); } - clearLongStrBuf(); + clearStrBuf(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -2939,12 +2930,12 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu continue; } case '\"': { - clearLongStrBuf(); + clearStrBuf(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); NS_HTML5_BREAK(beforedoctypepublicidentifierloop); } case '\'': { - clearLongStrBuf(); + clearStrBuf(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -2974,7 +2965,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu c = checkChar(buf, pos); switch(c) { case '\"': { - publicIdentifier = longStrBufToString(); + publicIdentifier = strBufToString(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos); NS_HTML5_BREAK(doctypepublicidentifierdoublequotedloop); } @@ -2983,24 +2974,24 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu errGtInPublicId(); } forceQuirks = true; - publicIdentifier = longStrBufToString(); + publicIdentifier = strBufToString(); emitDoctypeToken(pos); state = P::transition(mViewSource, NS_HTML5TOKENIZER_DATA, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } case '\r': { - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); continue; } case '\0': { c = 0xfffd; } default: { - appendLongStrBuf(c); + appendStrBuf(c); continue; } } @@ -3037,7 +3028,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu if (P::reportErrors) { errNoSpaceBetweenPublicAndSystemIds(); } - clearLongStrBuf(); + clearStrBuf(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -3045,7 +3036,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu if (P::reportErrors) { errNoSpaceBetweenPublicAndSystemIds(); } - clearLongStrBuf(); + clearStrBuf(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -3083,12 +3074,12 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu NS_HTML5_CONTINUE(stateloop); } case '\"': { - clearLongStrBuf(); + clearStrBuf(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); NS_HTML5_BREAK(betweendoctypepublicandsystemidentifiersloop); } case '\'': { - clearLongStrBuf(); + clearStrBuf(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -3109,7 +3100,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu c = checkChar(buf, pos); switch(c) { case '\"': { - systemIdentifier = longStrBufToString(); + systemIdentifier = strBufToString(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -3118,24 +3109,24 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu errGtInSystemId(); } forceQuirks = true; - systemIdentifier = longStrBufToString(); + systemIdentifier = strBufToString(); emitDoctypeToken(pos); state = P::transition(mViewSource, NS_HTML5TOKENIZER_DATA, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } case '\r': { - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); continue; } case '\0': { c = 0xfffd; } default: { - appendLongStrBuf(c); + appendStrBuf(c); continue; } } @@ -3260,7 +3251,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu if (P::reportErrors) { errNoSpaceBetweenDoctypeSystemKeywordAndQuote(); } - clearLongStrBuf(); + clearStrBuf(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -3268,7 +3259,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu if (P::reportErrors) { errNoSpaceBetweenDoctypeSystemKeywordAndQuote(); } - clearLongStrBuf(); + clearStrBuf(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -3310,12 +3301,12 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu continue; } case '\"': { - clearLongStrBuf(); + clearStrBuf(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } case '\'': { - clearLongStrBuf(); + clearStrBuf(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); NS_HTML5_BREAK(beforedoctypesystemidentifierloop); } @@ -3345,7 +3336,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu c = checkChar(buf, pos); switch(c) { case '\'': { - systemIdentifier = longStrBufToString(); + systemIdentifier = strBufToString(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -3354,24 +3345,24 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu errGtInSystemId(); } forceQuirks = true; - systemIdentifier = longStrBufToString(); + systemIdentifier = strBufToString(); emitDoctypeToken(pos); state = P::transition(mViewSource, NS_HTML5TOKENIZER_DATA, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } case '\r': { - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); continue; } case '\0': { c = 0xfffd; } default: { - appendLongStrBuf(c); + appendStrBuf(c); continue; } } @@ -3385,7 +3376,7 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu c = checkChar(buf, pos); switch(c) { case '\'': { - publicIdentifier = longStrBufToString(); + publicIdentifier = strBufToString(); state = P::transition(mViewSource, NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } @@ -3394,24 +3385,24 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu errGtInPublicId(); } forceQuirks = true; - publicIdentifier = longStrBufToString(); + publicIdentifier = strBufToString(); emitDoctypeToken(pos); state = P::transition(mViewSource, NS_HTML5TOKENIZER_DATA, reconsume, pos); NS_HTML5_CONTINUE(stateloop); } case '\r': { - appendLongStrBufCarriageReturn(); + appendStrBufCarriageReturn(); NS_HTML5_BREAK(stateloop); } case '\n': { - appendLongStrBufLineFeed(); + appendStrBufLineFeed(); continue; } case '\0': { c = 0xfffd; } default: { - appendLongStrBuf(c); + appendStrBuf(c); continue; } } @@ -3520,16 +3511,6 @@ nsHtml5Tokenizer::bogusDoctypeWithoutQuirks() forceQuirks = false; } -void -nsHtml5Tokenizer::emitOrAppendStrBuf(int32_t returnState) -{ - if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { - appendStrBufToLongStrBuf(); - } else { - emitStrBuf(); - } -} - void nsHtml5Tokenizer::handleNcrValue(int32_t returnState) { @@ -3625,7 +3606,7 @@ nsHtml5Tokenizer::eof() } case NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN: { errBogusComment(); - clearLongStrBuf(); + clearStrBuf(); emitComment(0, 0); NS_HTML5_BREAK(eofloop); } @@ -3706,7 +3687,7 @@ nsHtml5Tokenizer::eof() case NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: { errEofInPublicId(); forceQuirks = true; - publicIdentifier = longStrBufToString(); + publicIdentifier = strBufToString(); emitDoctypeToken(0); NS_HTML5_BREAK(eofloop); } @@ -3722,7 +3703,7 @@ nsHtml5Tokenizer::eof() case NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: { errEofInSystemId(); forceQuirks = true; - systemIdentifier = longStrBufToString(); + systemIdentifier = strBufToString(); emitDoctypeToken(0); NS_HTML5_BREAK(eofloop); } @@ -3737,13 +3718,13 @@ nsHtml5Tokenizer::eof() NS_HTML5_BREAK(eofloop); } case NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE: { - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); state = returnState; continue; } case NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP: { errNoNamedCharacterMatch(); - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); state = returnState; continue; } @@ -3773,7 +3754,7 @@ nsHtml5Tokenizer::eof() } if (entCol == nsHtml5NamedCharacters::NAMES[lo].length()) { candidate = lo; - strBufMark = strBufLen; + charRefBufMark = charRefBufLen; lo++; } else if (entCol > nsHtml5NamedCharacters::NAMES[lo].length()) { NS_HTML5_BREAK(outer); @@ -3792,7 +3773,7 @@ nsHtml5Tokenizer::eof() outer_end: ; if (candidate == -1) { errNoNamedCharacterMatch(); - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); state = returnState; NS_HTML5_CONTINUE(eofloop); } else { @@ -3800,14 +3781,14 @@ nsHtml5Tokenizer::eof() if (!candidateName.length() || candidateName.charAt(candidateName.length() - 1) != ';') { if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { char16_t ch; - if (strBufMark == strBufLen) { + if (charRefBufMark == charRefBufLen) { ch = '\0'; } else { - ch = strBuf[strBufMark]; + ch = charRefBuf[charRefBufMark]; } if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) { errNoNamedCharacterMatch(); - appendStrBufToLongStrBuf(); + appendCharRefBufToStrBuf(); state = returnState; NS_HTML5_CONTINUE(eofloop); } @@ -3824,13 +3805,11 @@ nsHtml5Tokenizer::eof() } else { emitOrAppendTwo(val, returnState); } - if (strBufMark < strBufLen) { + if (charRefBufMark < charRefBufLen) { if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { - for (int32_t i = strBufMark; i < strBufLen; i++) { - appendLongStrBuf(strBuf[i]); - } + appendStrBuf(charRefBuf, charRefBufMark, charRefBufLen - charRefBufMark); } else { - tokenHandler->characters(strBuf, strBufMark, strBufLen - strBufMark); + tokenHandler->characters(charRefBuf, charRefBufMark, charRefBufLen - charRefBufMark); } } state = returnState; @@ -3842,7 +3821,7 @@ nsHtml5Tokenizer::eof() case NS_HTML5TOKENIZER_HEX_NCR_LOOP: { if (!seenDigits) { errNoDigitsInNCR(); - emitOrAppendStrBuf(returnState); + emitOrAppendCharRefBuf(returnState); state = returnState; continue; } else { @@ -3896,8 +3875,8 @@ void nsHtml5Tokenizer::emitOrAppendTwo(const char16_t* val, int32_t returnState) { if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { - appendLongStrBuf(val[0]); - appendLongStrBuf(val[1]); + appendStrBuf(val[0]); + appendStrBuf(val[1]); } else { tokenHandler->characters(val, 0, 2); } @@ -3907,7 +3886,7 @@ void nsHtml5Tokenizer::emitOrAppendOne(const char16_t* val, int32_t returnState) { if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) { - appendLongStrBuf(val[0]); + appendStrBuf(val[0]); } else { tokenHandler->characters(val, 0, 1); } @@ -3917,7 +3896,6 @@ void nsHtml5Tokenizer::end() { strBuf = nullptr; - longStrBuf = nullptr; doctypeName = nullptr; if (systemIdentifier) { nsHtml5Portability::releaseString(systemIdentifier); @@ -3957,7 +3935,7 @@ void nsHtml5Tokenizer::resetToDataState() { strBufLen = 0; - longStrBufLen = 0; + charRefBufLen = 0; stateSave = NS_HTML5TOKENIZER_DATA; lastCR = false; index = 0; @@ -3968,8 +3946,7 @@ nsHtml5Tokenizer::resetToDataState() lo = 0; hi = 0; candidate = -1; - strBufMark = 0; - prevValue = -1; + charRefBufMark = 0; value = 0; seenDigits = false; endTag = false; @@ -3999,11 +3976,8 @@ nsHtml5Tokenizer::loadState(nsHtml5Tokenizer* other) strBuf = jArray::newJArray(strBufLen); } nsHtml5ArrayCopy::arraycopy(other->strBuf, strBuf, strBufLen); - longStrBufLen = other->longStrBufLen; - if (longStrBufLen > longStrBuf.length) { - longStrBuf = jArray::newJArray(longStrBufLen); - } - nsHtml5ArrayCopy::arraycopy(other->longStrBuf, longStrBuf, longStrBufLen); + charRefBufLen = other->charRefBufLen; + nsHtml5ArrayCopy::arraycopy(other->charRefBuf, charRefBuf, charRefBufLen); stateSave = other->stateSave; returnStateSave = other->returnStateSave; endTagExpectation = other->endTagExpectation; @@ -4017,8 +3991,7 @@ nsHtml5Tokenizer::loadState(nsHtml5Tokenizer* other) lo = other->lo; hi = other->hi; candidate = other->candidate; - strBufMark = other->strBufMark; - prevValue = other->prevValue; + charRefBufMark = other->charRefBufMark; value = other->value; seenDigits = other->seenDigits; endTag = other->endTag; @@ -4068,8 +4041,7 @@ void nsHtml5Tokenizer::initializeWithoutStarting() { confident = false; - strBuf = jArray::newJArray(64); - longStrBuf = jArray::newJArray(1024); + strBuf = nullptr; line = 1; resetToDataState(); } diff --git a/parser/html/nsHtml5Tokenizer.h b/parser/html/nsHtml5Tokenizer.h index c09922ea6e..c969b5ac73 100644 --- a/parser/html/nsHtml5Tokenizer.h +++ b/parser/html/nsHtml5Tokenizer.h @@ -98,8 +98,7 @@ class nsHtml5Tokenizer int32_t lo; int32_t hi; int32_t candidate; - int32_t strBufMark; - int32_t prevValue; + int32_t charRefBufMark; protected: int32_t value; private: @@ -111,8 +110,8 @@ class nsHtml5Tokenizer nsString* systemId; autoJArray strBuf; int32_t strBufLen; - autoJArray longStrBuf; - int32_t longStrBufLen; + autoJArray charRefBuf; + int32_t charRefBufLen; autoJArray bmpChar; autoJArray astralChar; protected: @@ -156,6 +155,18 @@ class nsHtml5Tokenizer nsHtml5HtmlAttributes* emptyAttributes(); private: + inline void appendCharRefBuf(char16_t c) + { + charRefBuf[charRefBufLen++] = c; + } + + inline void clearCharRefBufAndAppend(char16_t c) + { + charRefBuf[0] = c; + charRefBufLen = 1; + } + + void emitOrAppendCharRefBuf(int32_t returnState); inline void clearStrBufAndAppend(char16_t c) { strBuf[0] = c; @@ -173,36 +184,23 @@ class nsHtml5Tokenizer private: void strBufToDoctypeName(); void emitStrBuf(); - inline void clearLongStrBuf() - { - longStrBufLen = 0; - } - - inline void clearLongStrBufAndAppend(char16_t c) - { - longStrBuf[0] = c; - longStrBufLen = 1; - } - - void appendLongStrBuf(char16_t c); inline void appendSecondHyphenToBogusComment() { - appendLongStrBuf('-'); + appendStrBuf('-'); } - inline void adjustDoubleHyphenAndAppendToLongStrBufAndErr(char16_t c) + inline void adjustDoubleHyphenAndAppendToStrBufAndErr(char16_t c) { errConsecutiveHyphens(); - appendLongStrBuf(c); + appendStrBuf(c); } - void appendLongStrBuf(char16_t* buffer, int32_t offset, int32_t length); - inline void appendStrBufToLongStrBuf() + void appendStrBuf(char16_t* buffer, int32_t offset, int32_t length); + inline void appendCharRefBufToStrBuf() { - appendLongStrBuf(strBuf, 0, strBufLen); + appendStrBuf(charRefBuf, 0, charRefBufLen); } - nsString* longStrBufToString(); void emitComment(int32_t provisionalHyphens, int32_t pos); protected: void flushChars(char16_t* buf, int32_t pos); @@ -218,28 +216,28 @@ class nsHtml5Tokenizer private: template int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf, bool reconsume, int32_t returnState, int32_t endPos); void initDoctypeFields(); - inline void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn() + inline void adjustDoubleHyphenAndAppendToStrBufCarriageReturn() { silentCarriageReturn(); - adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n'); + adjustDoubleHyphenAndAppendToStrBufAndErr('\n'); } - inline void adjustDoubleHyphenAndAppendToLongStrBufLineFeed() + inline void adjustDoubleHyphenAndAppendToStrBufLineFeed() { silentLineFeed(); - adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n'); + adjustDoubleHyphenAndAppendToStrBufAndErr('\n'); } - inline void appendLongStrBufLineFeed() + inline void appendStrBufLineFeed() { silentLineFeed(); - appendLongStrBuf('\n'); + appendStrBuf('\n'); } - inline void appendLongStrBufCarriageReturn() + inline void appendStrBufCarriageReturn() { silentCarriageReturn(); - appendLongStrBuf('\n'); + appendStrBuf('\n'); } protected: @@ -261,7 +259,6 @@ class nsHtml5Tokenizer void setAdditionalAndRememberAmpersandLocation(char16_t add); void bogusDoctype(); void bogusDoctypeWithoutQuirks(); - void emitOrAppendStrBuf(int32_t returnState); void handleNcrValue(int32_t returnState); public: void eof(); @@ -370,7 +367,6 @@ class nsHtml5Tokenizer #define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION 73 #define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK 74 #define NS_HTML5TOKENIZER_LEAD_OFFSET (0xD800 - (0x10000 >> 10)) -#define NS_HTML5TOKENIZER_BUFFER_GROW_BY 1024 #endif diff --git a/parser/html/nsHtml5TokenizerCppSupplement.h b/parser/html/nsHtml5TokenizerCppSupplement.h index 5d30b5c8e8..c02d936c21 100644 --- a/parser/html/nsHtml5TokenizerCppSupplement.h +++ b/parser/html/nsHtml5TokenizerCppSupplement.h @@ -4,6 +4,44 @@ #include "mozilla/Likely.h" +bool +nsHtml5Tokenizer::EnsureBufferSpace(int32_t aLength) +{ + MOZ_ASSERT(aLength >= 0, "Negative length."); + // Add 2 to account for emissions of LT_GT, LT_SOLIDUS and RSQB_RSQB. + // Adding to the general worst case instead of only the + // TreeBuilder-exposed worst case to avoid re-introducing a bug when + // unifying the tokenizer and tree builder buffers in the future. + size_t worstCase = size_t(strBufLen) + + size_t(aLength) + + size_t(charRefBufLen) + + size_t(2); + if (worstCase > INT32_MAX) { + // Since we index into the buffer using int32_t due to the Java heritage + // of the code, let's treat this as OOM. + return false; + } + // TODO: Unify nsHtml5Tokenizer::strBuf and nsHtml5TreeBuilder::charBuffer + // so that the call below becomes unnecessary. + tokenHandler->EnsureBufferSpace(worstCase); + if (!strBuf) { + // Add one to round to the next power of two to avoid immediate + // reallocation once there are a few characters in the buffer. + strBuf = jArray::newFallibleJArray(mozilla::RoundUpPow2(worstCase + 1)); + if (!strBuf) { + return false; + } + } else if (worstCase > size_t(strBuf.length)) { + jArray newBuf = jArray::newFallibleJArray(mozilla::RoundUpPow2(worstCase)); + if (!newBuf) { + return false; + } + memcpy(newBuf,strBuf, sizeof(char16_t) * strBufLen); + strBuf = newBuf; + } + return true; +} + void nsHtml5Tokenizer::StartPlainText() { diff --git a/parser/html/nsHtml5TokenizerHSupplement.h b/parser/html/nsHtml5TokenizerHSupplement.h index 0dcaef1168..a899feec9f 100644 --- a/parser/html/nsHtml5TokenizerHSupplement.h +++ b/parser/html/nsHtml5TokenizerHSupplement.h @@ -7,6 +7,16 @@ inline nsHtml5HtmlAttributes* GetAttributes() return attributes; } +/** + * Makes sure the buffers are large enough to be able to tokenize aLength + * UTF-16 code units before having to make the buffers larger. + * + * @param aLength the number of UTF-16 code units to be tokenized before the + * next call to this method. + * @return true if successful; false if out of memory + */ +bool EnsureBufferSpace(int32_t aLength); + nsAutoPtr mViewSource; /** diff --git a/parser/html/nsHtml5TreeBuilder.cpp b/parser/html/nsHtml5TreeBuilder.cpp index c14544717e..0de99b4c41 100644 --- a/parser/html/nsHtml5TreeBuilder.cpp +++ b/parser/html/nsHtml5TreeBuilder.cpp @@ -88,7 +88,7 @@ nsHtml5TreeBuilder::startTokenization(nsHtml5Tokenizer* self) deepTreeSurrogateParent = nullptr; start(fragment); charBufferLen = 0; - charBuffer = jArray::newJArray(1024); + charBuffer = nullptr; framesetOk = true; if (fragment) { nsIContentHandle* elt; diff --git a/parser/html/nsHtml5TreeBuilderCppSupplement.h b/parser/html/nsHtml5TreeBuilderCppSupplement.h index 6e16a2db12..57ffc5ddc0 100644 --- a/parser/html/nsHtml5TreeBuilderCppSupplement.h +++ b/parser/html/nsHtml5TreeBuilderCppSupplement.h @@ -24,6 +24,7 @@ nsHtml5TreeBuilder::nsHtml5TreeBuilder(nsHtml5OplessBuilder* aBuilder) , mHandles(nullptr) , mHandlesUsed(0) , mSpeculativeLoadStage(nullptr) + , mBroken(NS_OK) , mCurrentHtmlScriptIsAsyncOrDefer(false) , mPreventScriptExecution(false) #ifdef DEBUG @@ -48,6 +49,7 @@ nsHtml5TreeBuilder::nsHtml5TreeBuilder(nsAHtml5TreeOpSink* aOpSink, , mHandles(new nsIContent*[NS_HTML5_TREE_BUILDER_HANDLE_ARRAY_LENGTH]) , mHandlesUsed(0) , mSpeculativeLoadStage(aStage) + , mBroken(NS_OK) , mCurrentHtmlScriptIsAsyncOrDefer(false) , mPreventScriptExecution(false) #ifdef DEBUG @@ -912,15 +914,39 @@ nsHtml5TreeBuilder::elementPopped(int32_t aNamespace, nsIAtom* aName, nsIContent void nsHtml5TreeBuilder::accumulateCharacters(const char16_t* aBuf, int32_t aStart, int32_t aLength) { - int32_t newFillLen = charBufferLen + aLength; - if (newFillLen > charBuffer.length) { - int32_t newAllocLength = newFillLen + (newFillLen >> 1); - jArray newBuf = jArray::newJArray(newAllocLength); + MOZ_ASSERT(charBufferLen + aLength <= charBuffer.length, + "About to memcpy past the end of the buffer!"); + memcpy(charBuffer + charBufferLen, aBuf + aStart, sizeof(char16_t) * aLength); + charBufferLen += aLength; +} + +bool +nsHtml5TreeBuilder::EnsureBufferSpace(size_t aLength) +{ + // TODO: Unify nsHtml5Tokenizer::strBuf and nsHtml5TreeBuilder::charBuffer + // so that this method becomes unnecessary. + size_t worstCase = size_t(charBufferLen) + aLength; + if (worstCase > INT32_MAX) { + // Since we index into the buffer using int32_t due to the Java heritage + // of the code, let's treat this as OOM. + return false; + } + if (!charBuffer) { + // Add one to round to the next power of two to avoid immediate + // reallocation once there are a few characters in the buffer. + charBuffer = jArray::newFallibleJArray(mozilla::RoundUpPow2(worstCase + 1)); + if (!charBuffer) { + return false; + } + } else if (worstCase > size_t(charBuffer.length)) { + jArray newBuf = jArray::newFallibleJArray(mozilla::RoundUpPow2(worstCase)); + if (!newBuf) { + return false; + } memcpy(newBuf, charBuffer, sizeof(char16_t) * charBufferLen); charBuffer = newBuf; } - memcpy(charBuffer + charBufferLen, aBuf + aStart, sizeof(char16_t) * aLength); - charBufferLen = newFillLen; + return true; } nsIContentHandle* @@ -1074,6 +1100,7 @@ nsHtml5TreeBuilder::MarkAsBroken(nsresult aRv) MOZ_ASSERT_UNREACHABLE("Must not call this with builder."); return; } + mBroken = aRv; mOpQueue.Clear(); // Previous ops don't matter anymore mOpQueue.AppendElement()->Init(aRv); } diff --git a/parser/html/nsHtml5TreeBuilderHSupplement.h b/parser/html/nsHtml5TreeBuilderHSupplement.h index c47fe56267..944cfc53c9 100644 --- a/parser/html/nsHtml5TreeBuilderHSupplement.h +++ b/parser/html/nsHtml5TreeBuilderHSupplement.h @@ -18,6 +18,7 @@ int32_t mHandlesUsed; nsTArray > mOldHandles; nsHtml5TreeOpStage* mSpeculativeLoadStage; + nsresult mBroken; bool mCurrentHtmlScriptIsAsyncOrDefer; bool mPreventScriptExecution; #ifdef DEBUG @@ -127,6 +128,16 @@ return mBuilder; } + /** + * Makes sure the buffers are large enough to be able to tokenize aLength + * UTF-16 code units before having to make the buffers larger. + * + * @param aLength the number of UTF-16 code units to be tokenized before the + * next call to this method. + * @return true if successful; false if out of memory + */ + bool EnsureBufferSpace(size_t aLength); + void EnableViewSource(nsHtml5Highlighter* aHighlighter); void errStrayStartTag(nsIAtom* aName); @@ -224,3 +235,12 @@ void errEndWithUnclosedElements(nsIAtom* aName); void MarkAsBroken(nsresult aRv); + + /** + * Checks if this parser is broken. Returns a non-NS_OK (i.e. non-0) + * value if broken. + */ + nsresult IsBroken() + { + return mBroken; + } diff --git a/parser/html/nsHtml5UTF16Buffer.cpp b/parser/html/nsHtml5UTF16Buffer.cpp index e3c0140fda..f70365ce44 100644 --- a/parser/html/nsHtml5UTF16Buffer.cpp +++ b/parser/html/nsHtml5UTF16Buffer.cpp @@ -84,6 +84,12 @@ nsHtml5UTF16Buffer::hasMore() return start < end; } +int32_t +nsHtml5UTF16Buffer::getLength() +{ + return end - start; +} + void nsHtml5UTF16Buffer::adjust(bool lastWasCR) { diff --git a/parser/html/nsHtml5UTF16Buffer.h b/parser/html/nsHtml5UTF16Buffer.h index 6469d6d277..cf810e124c 100644 --- a/parser/html/nsHtml5UTF16Buffer.h +++ b/parser/html/nsHtml5UTF16Buffer.h @@ -67,6 +67,7 @@ class nsHtml5UTF16Buffer char16_t* getBuffer(); int32_t getEnd(); bool hasMore(); + int32_t getLength(); void adjust(bool lastWasCR); void setEnd(int32_t end); static void initializeStatics(); diff --git a/parser/htmlparser/tests/mochitest/html5lib_tree_construction/entities01.dat b/parser/htmlparser/tests/mochitest/html5lib_tree_construction/entities01.dat index 20d53a0fd5..b271f8220f 100644 --- a/parser/htmlparser/tests/mochitest/html5lib_tree_construction/entities01.dat +++ b/parser/htmlparser/tests/mochitest/html5lib_tree_construction/entities01.dat @@ -721,3 +721,72 @@ FOO�ZOO | | | "FOO�ZOO" + +#data +FOO� +#errors +(1,3): expected-doctype-but-got-chars +(1,13): illegal-codepoint-for-numeric-entity +(1,13): eof-in-numeric-entity +#document +| +| +| +| "FOO�" + +#data +FOO� +#errors +(1,3): expected-doctype-but-got-chars +(1,13): illegal-codepoint-for-numeric-entity +(1,13): eof-in-numeric-entity +#document +| +| +| +| "FOO�" + +#data +FOO� +#errors +(1,3): expected-doctype-but-got-chars +(1,13): illegal-codepoint-for-numeric-entity +(1,13): eof-in-numeric-entity +#document +| +| +| +| "FOO�" + +#data +FOO�ZOO +#errors +(1,3): expected-doctype-but-got-chars +(1,13): illegal-codepoint-for-numeric-entity +#document +| +| +| +| "FOO�ZOO" + +#data +FOO�ZOO +#errors +(1,3): expected-doctype-but-got-chars +(1,13): illegal-codepoint-for-numeric-entity +#document +| +| +| +| "FOO�ZOO" + +#data +FOO�ZOO +#errors +(1,3): expected-doctype-but-got-chars +(1,13): illegal-codepoint-for-numeric-entity +#document +| +| +| +| "FOO�ZOO" diff --git a/xpcom/base/nsCycleCollector.cpp b/xpcom/base/nsCycleCollector.cpp index eb7dad5f79..51bec6e865 100644 --- a/xpcom/base/nsCycleCollector.cpp +++ b/xpcom/base/nsCycleCollector.cpp @@ -1395,24 +1395,6 @@ static mozilla::ThreadLocal sCollectorData; // Utility functions //////////////////////////////////////////////////////////////////////// -MOZ_NEVER_INLINE static void -Fault(const char* aMsg, const void* aPtr = nullptr) -{ - if (aPtr) { - printf("Fault in cycle collector: %s (ptr: %p)\n", aMsg, aPtr); - } else { - printf("Fault in cycle collector: %s\n", aMsg); - } - - NS_RUNTIMEABORT("cycle collector fault"); -} - -static void -Fault(const char* aMsg, PtrInfo* aPi) -{ - Fault(aMsg, aPi->mPointer); -} - static inline void ToParticipant(nsISupports* aPtr, nsXPCOMCycleCollectionParticipant** aCp) { @@ -2255,9 +2237,7 @@ CCGraphBuilder::BuildGraph(SliceBudget& aBudget) if (pi->mParticipant) { nsresult rv = pi->mParticipant->Traverse(pi->mPointer, *this); - if (NS_FAILED(rv)) { - Fault("script pointer traversal failed", pi); - } + MOZ_RELEASE_ASSERT(!NS_FAILED(rv), "Cycle collector Traverse method failed"); } if (mCurrNode->AtBlockEnd()) { @@ -2313,12 +2293,9 @@ CCGraphBuilder::NoteNativeRoot(void* aRoot, NS_IMETHODIMP_(void) CCGraphBuilder::DescribeRefCountedNode(nsrefcnt aRefCount, const char* aObjName) { - if (aRefCount == 0) { - Fault("zero refcount", mCurrPi); - } - if (aRefCount == UINT32_MAX) { - Fault("overflowing refcount", mCurrPi); - } + MOZ_RELEASE_ASSERT(aRefCount != 0, "CCed refcounted object has zero refcount"); + MOZ_RELEASE_ASSERT(aRefCount != UINT32_MAX, "CCed refcounted object has overflowing refcount"); + mResults.mVisitedRefCounted++; if (mListener) { @@ -3135,12 +3112,10 @@ nsCycleCollector::ScanWhiteNodes(bool aFullySynchGraphBuild) continue; } - if (MOZ_LIKELY(pi->mInternalRefs < pi->mRefCount)) { - // This node will get marked black in the next pass. - continue; - } + MOZ_RELEASE_ASSERT(pi->mInternalRefs < pi->mRefCount, + "Cycle collector found more references to an object than its refcount"); - Fault("Traversed refs exceed refcount", pi); + // This node will get marked black in the next pass. } } @@ -3405,10 +3380,7 @@ nsCycleCollector::~nsCycleCollector() void nsCycleCollector::RegisterJSRuntime(CycleCollectedJSRuntime* aJSRuntime) { - if (mJSRuntime) { - Fault("multiple registrations of cycle collector JS runtime", aJSRuntime); - } - + MOZ_RELEASE_ASSERT(!mJSRuntime, "Multiple registrations of JS runtime in cycle collector"); mJSRuntime = aJSRuntime; // We can't register as a reporter in nsCycleCollector() because that runs @@ -3424,10 +3396,7 @@ nsCycleCollector::RegisterJSRuntime(CycleCollectedJSRuntime* aJSRuntime) void nsCycleCollector::ForgetJSRuntime() { - if (!mJSRuntime) { - Fault("forgetting non-registered cycle collector JS runtime"); - } - + MOZ_RELEASE_ASSERT(mJSRuntime, "Forgetting JS runtime in cycle collector before a JS runtime was registered"); mJSRuntime = nullptr; } @@ -3451,9 +3420,9 @@ nsCycleCollector::Suspect(void* aPtr, nsCycleCollectionParticipant* aParti, { CheckThreadSafety(); - // Re-entering ::Suspect during collection used to be a fault, but - // we are canonicalizing nsISupports pointers using QI, so we will - // see some spurious refcount traffic here. + // Re-entering ::Suspect during collection used to be a fatal error, + // but we are canonicalizing nsISupports pointers using QI, so we + // will see some spurious refcount traffic here. if (MOZ_UNLIKELY(mScanInProgress)) { return; diff --git a/xpcom/base/nsCycleCollector.h b/xpcom/base/nsCycleCollector.h index e43de375a6..fadd57bec1 100644 --- a/xpcom/base/nsCycleCollector.h +++ b/xpcom/base/nsCycleCollector.h @@ -19,7 +19,7 @@ template struct already_AddRefed; namespace mozilla { class CycleCollectedJSRuntime; -} +} // namespace mozilla bool nsCycleCollector_init(); diff --git a/xpcom/base/nsMemoryReporterManager.cpp b/xpcom/base/nsMemoryReporterManager.cpp index 87bc869e1b..3be0258635 100644 --- a/xpcom/base/nsMemoryReporterManager.cpp +++ b/xpcom/base/nsMemoryReporterManager.cpp @@ -1365,11 +1365,13 @@ nsMemoryReporterManager::~nsMemoryReporterManager() NS_ASSERTION(!mSavedWeakReporters, "failed to restore weak reporters"); } -//#define DEBUG_CHILD_PROCESS_MEMORY_REPORTING 1 +#ifdef MOZ_WIDGET_GONK +#define DEBUG_CHILD_PROCESS_MEMORY_REPORTING 1 +#endif #ifdef DEBUG_CHILD_PROCESS_MEMORY_REPORTING #define MEMORY_REPORTING_LOG(format, ...) \ - fprintf(stderr, "++++ MEMORY REPORTING: " format, ##__VA_ARGS__); + printf_stderr("++++ MEMORY REPORTING: " format, ##__VA_ARGS__); #else #define MEMORY_REPORTING_LOG(...) #endif