From c690e26c67a31a792c755e2033da54f71e8762a8 Mon Sep 17 00:00:00 2001 From: Moonchild Date: Mon, 25 May 2026 13:08:29 +0200 Subject: [PATCH] No issue - Remove ISO-2022-JP from menu, overridability and detector. Removed autodetection of this charset due to unreliability. There are known security issues around charset auto-detection for ISO-2022-JP. Given the usage is around 0.000002% of page loads, and Safari does not support auto-detection of ISO-2022-JP, Chrome also planned to remove support for it to eliminate the security issues. See also https://www.sonarsource.com/blog/encoding-differentials-why-charset-matters/ Removal from the menu to avoid social engineering hazards and it being literally unused on the web. --- dom/encoding/EncodingUtils.cpp | 1 + .../universalchardet/src/base/moz.build | 2 - .../src/base/nsEscCharsetProber.cpp | 46 -------------- .../src/base/nsEscCharsetProber.h | 32 ---------- .../universalchardet/src/base/nsEscSM.cpp | 63 ------------------- .../src/base/nsUniversalDetector.cpp | 34 ---------- .../src/base/nsUniversalDetector.h | 4 +- toolkit/modules/CharsetMenu.jsm | 5 +- 8 files changed, 4 insertions(+), 183 deletions(-) delete mode 100644 extensions/universalchardet/src/base/nsEscCharsetProber.cpp delete mode 100644 extensions/universalchardet/src/base/nsEscCharsetProber.h delete mode 100644 extensions/universalchardet/src/base/nsEscSM.cpp diff --git a/dom/encoding/EncodingUtils.cpp b/dom/encoding/EncodingUtils.cpp index 1272e506a5..2f148b719f 100644 --- a/dom/encoding/EncodingUtils.cpp +++ b/dom/encoding/EncodingUtils.cpp @@ -63,6 +63,7 @@ EncodingUtils::IsAsciiCompatible(const nsACString& aPreferredName) aPreferredName.LowerCaseEqualsLiteral("utf-16be") || aPreferredName.LowerCaseEqualsLiteral("utf-16le") || aPreferredName.LowerCaseEqualsLiteral("replacement") || + aPreferredName.LowerCaseEqualsLiteral("iso-2022-jp") || aPreferredName.LowerCaseEqualsLiteral("hz-gb-2312") || aPreferredName.LowerCaseEqualsLiteral("utf-7") || aPreferredName.LowerCaseEqualsLiteral("x-imap4-modified-utf7")); diff --git a/extensions/universalchardet/src/base/moz.build b/extensions/universalchardet/src/base/moz.build index 62417f9ef4..c077affd0c 100644 --- a/extensions/universalchardet/src/base/moz.build +++ b/extensions/universalchardet/src/base/moz.build @@ -7,8 +7,6 @@ UNIFIED_SOURCES += [ 'CharDistribution.cpp', 'JpCntx.cpp', 'nsCharSetProber.cpp', - 'nsEscCharsetProber.cpp', - 'nsEscSM.cpp', 'nsEUCJPProber.cpp', 'nsLatin1Prober.cpp', 'nsMBCSGroupProber.cpp', diff --git a/extensions/universalchardet/src/base/nsEscCharsetProber.cpp b/extensions/universalchardet/src/base/nsEscCharsetProber.cpp deleted file mode 100644 index b4fbfeb00b..0000000000 --- a/extensions/universalchardet/src/base/nsEscCharsetProber.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - - -#include "nsEscCharsetProber.h" -#include "nsUniversalDetector.h" - -nsEscCharSetProber::nsEscCharSetProber() -{ - mCodingSM = new nsCodingStateMachine(&ISO2022JPSMModel); - mState = eDetecting; - mDetectedCharset = nullptr; -} - -nsEscCharSetProber::~nsEscCharSetProber(void) -{ -} - -void nsEscCharSetProber::Reset(void) -{ - mState = eDetecting; - mCodingSM->Reset(); - mDetectedCharset = nullptr; -} - -nsProbingState nsEscCharSetProber::HandleData(const char* aBuf, uint32_t aLen) -{ - nsSMState codingState; - uint32_t i; - - for ( i = 0; i < aLen && mState == eDetecting; i++) - { - codingState = mCodingSM->NextState(aBuf[i]); - if (codingState == eItsMe) - { - mState = eFoundIt; - mDetectedCharset = mCodingSM->GetCodingStateMachine(); - return mState; - } - } - - return mState; -} - diff --git a/extensions/universalchardet/src/base/nsEscCharsetProber.h b/extensions/universalchardet/src/base/nsEscCharsetProber.h deleted file mode 100644 index 4507972d05..0000000000 --- a/extensions/universalchardet/src/base/nsEscCharsetProber.h +++ /dev/null @@ -1,32 +0,0 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef nsEscCharSetProber_h__ -#define nsEscCharSetProber_h__ - -#include "nsCharSetProber.h" -#include "nsCodingStateMachine.h" -#include "nsAutoPtr.h" - -class nsEscCharSetProber: public nsCharSetProber { -public: - nsEscCharSetProber(); - virtual ~nsEscCharSetProber(void); - nsProbingState HandleData(const char* aBuf, uint32_t aLen); - const char* GetCharSetName() {return mDetectedCharset;} - nsProbingState GetState(void) {return mState;} - void Reset(void); - float GetConfidence(void){return (float)0.99;} - -protected: - void GetDistribution(uint32_t aCharLen, const char* aStr); - - nsAutoPtr mCodingSM; - nsProbingState mState; - const char * mDetectedCharset; -}; - -#endif /* nsEscCharSetProber_h__ */ - diff --git a/extensions/universalchardet/src/base/nsEscSM.cpp b/extensions/universalchardet/src/base/nsEscSM.cpp deleted file mode 100644 index 77a223fec3..0000000000 --- a/extensions/universalchardet/src/base/nsEscSM.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include "nsCodingStateMachine.h" - -static const uint32_t ISO2022JP_cls [ 256 / 8 ] = { -PCK4BITS(2,0,0,0,0,0,0,0), // 00 - 07 -PCK4BITS(0,0,0,0,0,0,2,2), // 08 - 0f -PCK4BITS(0,0,0,0,0,0,0,0), // 10 - 17 -PCK4BITS(0,0,0,1,0,0,0,0), // 18 - 1f -PCK4BITS(0,0,0,0,7,0,0,0), // 20 - 27 -PCK4BITS(3,0,0,0,0,0,0,0), // 28 - 2f -PCK4BITS(0,0,0,0,0,0,0,0), // 30 - 37 -PCK4BITS(0,0,0,0,0,0,0,0), // 38 - 3f -PCK4BITS(6,0,4,0,8,0,0,0), // 40 - 47 -PCK4BITS(0,9,5,0,0,0,0,0), // 48 - 4f -PCK4BITS(0,0,0,0,0,0,0,0), // 50 - 57 -PCK4BITS(0,0,0,0,0,0,0,0), // 58 - 5f -PCK4BITS(0,0,0,0,0,0,0,0), // 60 - 67 -PCK4BITS(0,0,0,0,0,0,0,0), // 68 - 6f -PCK4BITS(0,0,0,0,0,0,0,0), // 70 - 77 -PCK4BITS(0,0,0,0,0,0,0,0), // 78 - 7f -PCK4BITS(2,2,2,2,2,2,2,2), // 80 - 87 -PCK4BITS(2,2,2,2,2,2,2,2), // 88 - 8f -PCK4BITS(2,2,2,2,2,2,2,2), // 90 - 97 -PCK4BITS(2,2,2,2,2,2,2,2), // 98 - 9f -PCK4BITS(2,2,2,2,2,2,2,2), // a0 - a7 -PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af -PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7 -PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf -PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7 -PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf -PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7 -PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df -PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7 -PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef -PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7 -PCK4BITS(2,2,2,2,2,2,2,2) // f8 - ff -}; - - -static const uint32_t ISO2022JP_st [ 9] = { -PCK4BITS(eStart, 3,eError,eStart,eStart,eStart,eStart,eStart),//00-07 -PCK4BITS(eStart,eStart,eError,eError,eError,eError,eError,eError),//08-0f -PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//10-17 -PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError),//18-1f -PCK4BITS(eError, 5,eError,eError,eError, 4,eError,eError),//20-27 -PCK4BITS(eError,eError,eError, 6,eItsMe,eError,eItsMe,eError),//28-2f -PCK4BITS(eError,eError,eError,eError,eError,eError,eItsMe,eItsMe),//30-37 -PCK4BITS(eError,eError,eError,eItsMe,eError,eError,eError,eError),//38-3f -PCK4BITS(eError,eError,eError,eError,eItsMe,eError,eStart,eStart) //40-47 -}; - -static const uint32_t ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - -const SMModel ISO2022JPSMModel = { - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_cls }, - 10, - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_st }, - CHAR_LEN_TABLE(ISO2022JPCharLenTable), - "ISO-2022-JP", -}; diff --git a/extensions/universalchardet/src/base/nsUniversalDetector.cpp b/extensions/universalchardet/src/base/nsUniversalDetector.cpp index d272827b81..1cecfda61c 100644 --- a/extensions/universalchardet/src/base/nsUniversalDetector.cpp +++ b/extensions/universalchardet/src/base/nsUniversalDetector.cpp @@ -8,7 +8,6 @@ #include "nsUniversalDetector.h" #include "nsMBCSGroupProber.h" -#include "nsEscCharsetProber.h" #include "nsLatin1Prober.h" nsUniversalDetector::nsUniversalDetector() @@ -16,7 +15,6 @@ nsUniversalDetector::nsUniversalDetector() mDone = false; mBestGuess = -1; //illegal value as signal mInTag = false; - mEscCharSetProber = nullptr; mStart = true; mDetectedCharset = nullptr; @@ -33,8 +31,6 @@ nsUniversalDetector::~nsUniversalDetector() { for (int32_t i = 0; i < NUM_OF_CHARSET_PROBERS; i++) delete mCharSetProbers[i]; - - delete mEscCharSetProber; } void @@ -50,9 +46,6 @@ nsUniversalDetector::Reset() mInputState = ePureAscii; mLastChar = '\0'; - if (mEscCharSetProber) - mEscCharSetProber->Reset(); - uint32_t i; for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++) if (mCharSetProbers[i]) @@ -117,12 +110,6 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, uint32_t aLen) //adjust state mInputState = eHighbyte; - //kill mEscCharSetProber if it is active - if (mEscCharSetProber) { - delete mEscCharSetProber; - mEscCharSetProber = nullptr; - } - //start multibyte and singlebyte charset prober if (nullptr == mCharSetProbers[0]) { @@ -140,12 +127,6 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, uint32_t aLen) } else { - //ok, just pure ascii so far - if ((ePureAscii == mInputState) && (aBuf[i] == '\033')) - { - //found escape character - mInputState = eEscAscii; - } mLastChar = aBuf[i]; } } @@ -153,19 +134,6 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, uint32_t aLen) nsProbingState st; switch (mInputState) { - case eEscAscii: - if (nullptr == mEscCharSetProber) { - mEscCharSetProber = new nsEscCharSetProber(); - if (nullptr == mEscCharSetProber) - return NS_ERROR_OUT_OF_MEMORY; - } - st = mEscCharSetProber->HandleData(aBuf, aLen); - if (st == eFoundIt) - { - mDone = true; - mDetectedCharset = mEscCharSetProber->GetCharSetName(); - } - break; case eHighbyte: for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++) { @@ -231,8 +199,6 @@ void nsUniversalDetector::DataEnd() Report(mCharSetProbers[maxProber]->GetCharSetName()); } break; - case eEscAscii: - break; default: ; } diff --git a/extensions/universalchardet/src/base/nsUniversalDetector.h b/extensions/universalchardet/src/base/nsUniversalDetector.h index 345e74f9f9..d6025fc2dc 100644 --- a/extensions/universalchardet/src/base/nsUniversalDetector.h +++ b/extensions/universalchardet/src/base/nsUniversalDetector.h @@ -12,8 +12,7 @@ class nsCharSetProber; typedef enum { ePureAscii = 0, - eEscAscii = 1, - eHighbyte = 2 + eHighbyte = 1 } nsInputState; class nsUniversalDetector { @@ -37,7 +36,6 @@ protected: uint32_t mLanguageFilter; nsCharSetProber *mCharSetProbers[NUM_OF_CHARSET_PROBERS]; - nsCharSetProber *mEscCharSetProber; }; #endif diff --git a/toolkit/modules/CharsetMenu.jsm b/toolkit/modules/CharsetMenu.jsm index f6479c0248..64e0575278 100644 --- a/toolkit/modules/CharsetMenu.jsm +++ b/toolkit/modules/CharsetMenu.jsm @@ -25,8 +25,7 @@ const kAutoDetectors = [ /** * This set contains encodings that are in the Encoding Standard, except: - * - XSS-dangerous encodings (except ISO-2022-JP which is assumed to be - * too common not to be included). + * - XSS-dangerous encodings. * - x-user-defined, which practically never makes sense as an end-user-chosen * override. * - Encodings that IE11 doesn't have in its correspoding menu. @@ -65,7 +64,7 @@ const kEncodings = new Set([ // Japanese "Shift_JIS", "EUC-JP", - "ISO-2022-JP", + // "ISO-2022-JP", // Intentionally not in menu. // Korean "EUC-KR", // Thai