No issue - Remove ISO-2022-JP from menu, overridability and detector.

Removed autodetection of this charset due to unreliability.
There are known security issues around charset auto-detection for ISO-2022-JP.
Given the usage is around 0.000002% of page loads, and Safari does not
support auto-detection of ISO-2022-JP, Chrome also planned to remove
support for it to eliminate the security issues.

See also https://www.sonarsource.com/blog/encoding-differentials-why-charset-matters/

Removal from the menu to avoid social engineering hazards and it being
literally unused on the web.
This commit is contained in:
Moonchild
2026-05-25 13:08:29 +02:00
committed by roytam1
parent e9c3451d54
commit c690e26c67
8 changed files with 4 additions and 183 deletions
+1
View File
@@ -63,6 +63,7 @@ EncodingUtils::IsAsciiCompatible(const nsACString& aPreferredName)
aPreferredName.LowerCaseEqualsLiteral("utf-16be") ||
aPreferredName.LowerCaseEqualsLiteral("utf-16le") ||
aPreferredName.LowerCaseEqualsLiteral("replacement") ||
aPreferredName.LowerCaseEqualsLiteral("iso-2022-jp") ||
aPreferredName.LowerCaseEqualsLiteral("hz-gb-2312") ||
aPreferredName.LowerCaseEqualsLiteral("utf-7") ||
aPreferredName.LowerCaseEqualsLiteral("x-imap4-modified-utf7"));
@@ -7,8 +7,6 @@ UNIFIED_SOURCES += [
'CharDistribution.cpp',
'JpCntx.cpp',
'nsCharSetProber.cpp',
'nsEscCharsetProber.cpp',
'nsEscSM.cpp',
'nsEUCJPProber.cpp',
'nsLatin1Prober.cpp',
'nsMBCSGroupProber.cpp',
@@ -1,46 +0,0 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsEscCharsetProber.h"
#include "nsUniversalDetector.h"
nsEscCharSetProber::nsEscCharSetProber()
{
mCodingSM = new nsCodingStateMachine(&ISO2022JPSMModel);
mState = eDetecting;
mDetectedCharset = nullptr;
}
nsEscCharSetProber::~nsEscCharSetProber(void)
{
}
void nsEscCharSetProber::Reset(void)
{
mState = eDetecting;
mCodingSM->Reset();
mDetectedCharset = nullptr;
}
nsProbingState nsEscCharSetProber::HandleData(const char* aBuf, uint32_t aLen)
{
nsSMState codingState;
uint32_t i;
for ( i = 0; i < aLen && mState == eDetecting; i++)
{
codingState = mCodingSM->NextState(aBuf[i]);
if (codingState == eItsMe)
{
mState = eFoundIt;
mDetectedCharset = mCodingSM->GetCodingStateMachine();
return mState;
}
}
return mState;
}
@@ -1,32 +0,0 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsEscCharSetProber_h__
#define nsEscCharSetProber_h__
#include "nsCharSetProber.h"
#include "nsCodingStateMachine.h"
#include "nsAutoPtr.h"
class nsEscCharSetProber: public nsCharSetProber {
public:
nsEscCharSetProber();
virtual ~nsEscCharSetProber(void);
nsProbingState HandleData(const char* aBuf, uint32_t aLen);
const char* GetCharSetName() {return mDetectedCharset;}
nsProbingState GetState(void) {return mState;}
void Reset(void);
float GetConfidence(void){return (float)0.99;}
protected:
void GetDistribution(uint32_t aCharLen, const char* aStr);
nsAutoPtr<nsCodingStateMachine> mCodingSM;
nsProbingState mState;
const char * mDetectedCharset;
};
#endif /* nsEscCharSetProber_h__ */
@@ -1,63 +0,0 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsCodingStateMachine.h"
static const uint32_t ISO2022JP_cls [ 256 / 8 ] = {
PCK4BITS(2,0,0,0,0,0,0,0), // 00 - 07
PCK4BITS(0,0,0,0,0,0,2,2), // 08 - 0f
PCK4BITS(0,0,0,0,0,0,0,0), // 10 - 17
PCK4BITS(0,0,0,1,0,0,0,0), // 18 - 1f
PCK4BITS(0,0,0,0,7,0,0,0), // 20 - 27
PCK4BITS(3,0,0,0,0,0,0,0), // 28 - 2f
PCK4BITS(0,0,0,0,0,0,0,0), // 30 - 37
PCK4BITS(0,0,0,0,0,0,0,0), // 38 - 3f
PCK4BITS(6,0,4,0,8,0,0,0), // 40 - 47
PCK4BITS(0,9,5,0,0,0,0,0), // 48 - 4f
PCK4BITS(0,0,0,0,0,0,0,0), // 50 - 57
PCK4BITS(0,0,0,0,0,0,0,0), // 58 - 5f
PCK4BITS(0,0,0,0,0,0,0,0), // 60 - 67
PCK4BITS(0,0,0,0,0,0,0,0), // 68 - 6f
PCK4BITS(0,0,0,0,0,0,0,0), // 70 - 77
PCK4BITS(0,0,0,0,0,0,0,0), // 78 - 7f
PCK4BITS(2,2,2,2,2,2,2,2), // 80 - 87
PCK4BITS(2,2,2,2,2,2,2,2), // 88 - 8f
PCK4BITS(2,2,2,2,2,2,2,2), // 90 - 97
PCK4BITS(2,2,2,2,2,2,2,2), // 98 - 9f
PCK4BITS(2,2,2,2,2,2,2,2), // a0 - a7
PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af
PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7
PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef
PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7
PCK4BITS(2,2,2,2,2,2,2,2) // f8 - ff
};
static const uint32_t ISO2022JP_st [ 9] = {
PCK4BITS(eStart, 3,eError,eStart,eStart,eStart,eStart,eStart),//00-07
PCK4BITS(eStart,eStart,eError,eError,eError,eError,eError,eError),//08-0f
PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//10-17
PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError),//18-1f
PCK4BITS(eError, 5,eError,eError,eError, 4,eError,eError),//20-27
PCK4BITS(eError,eError,eError, 6,eItsMe,eError,eItsMe,eError),//28-2f
PCK4BITS(eError,eError,eError,eError,eError,eError,eItsMe,eItsMe),//30-37
PCK4BITS(eError,eError,eError,eItsMe,eError,eError,eError,eError),//38-3f
PCK4BITS(eError,eError,eError,eError,eItsMe,eError,eStart,eStart) //40-47
};
static const uint32_t ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
const SMModel ISO2022JPSMModel = {
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_cls },
10,
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_st },
CHAR_LEN_TABLE(ISO2022JPCharLenTable),
"ISO-2022-JP",
};
@@ -8,7 +8,6 @@
#include "nsUniversalDetector.h"
#include "nsMBCSGroupProber.h"
#include "nsEscCharsetProber.h"
#include "nsLatin1Prober.h"
nsUniversalDetector::nsUniversalDetector()
@@ -16,7 +15,6 @@ nsUniversalDetector::nsUniversalDetector()
mDone = false;
mBestGuess = -1; //illegal value as signal
mInTag = false;
mEscCharSetProber = nullptr;
mStart = true;
mDetectedCharset = nullptr;
@@ -33,8 +31,6 @@ nsUniversalDetector::~nsUniversalDetector()
{
for (int32_t i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
delete mCharSetProbers[i];
delete mEscCharSetProber;
}
void
@@ -50,9 +46,6 @@ nsUniversalDetector::Reset()
mInputState = ePureAscii;
mLastChar = '\0';
if (mEscCharSetProber)
mEscCharSetProber->Reset();
uint32_t i;
for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
if (mCharSetProbers[i])
@@ -117,12 +110,6 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, uint32_t aLen)
//adjust state
mInputState = eHighbyte;
//kill mEscCharSetProber if it is active
if (mEscCharSetProber) {
delete mEscCharSetProber;
mEscCharSetProber = nullptr;
}
//start multibyte and singlebyte charset prober
if (nullptr == mCharSetProbers[0])
{
@@ -140,12 +127,6 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, uint32_t aLen)
}
else
{
//ok, just pure ascii so far
if ((ePureAscii == mInputState) && (aBuf[i] == '\033'))
{
//found escape character
mInputState = eEscAscii;
}
mLastChar = aBuf[i];
}
}
@@ -153,19 +134,6 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, uint32_t aLen)
nsProbingState st;
switch (mInputState)
{
case eEscAscii:
if (nullptr == mEscCharSetProber) {
mEscCharSetProber = new nsEscCharSetProber();
if (nullptr == mEscCharSetProber)
return NS_ERROR_OUT_OF_MEMORY;
}
st = mEscCharSetProber->HandleData(aBuf, aLen);
if (st == eFoundIt)
{
mDone = true;
mDetectedCharset = mEscCharSetProber->GetCharSetName();
}
break;
case eHighbyte:
for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
{
@@ -231,8 +199,6 @@ void nsUniversalDetector::DataEnd()
Report(mCharSetProbers[maxProber]->GetCharSetName());
}
break;
case eEscAscii:
break;
default:
;
}
@@ -12,8 +12,7 @@ class nsCharSetProber;
typedef enum {
ePureAscii = 0,
eEscAscii = 1,
eHighbyte = 2
eHighbyte = 1
} nsInputState;
class nsUniversalDetector {
@@ -37,7 +36,6 @@ protected:
uint32_t mLanguageFilter;
nsCharSetProber *mCharSetProbers[NUM_OF_CHARSET_PROBERS];
nsCharSetProber *mEscCharSetProber;
};
#endif
+2 -3
View File
@@ -25,8 +25,7 @@ const kAutoDetectors = [
/**
* This set contains encodings that are in the Encoding Standard, except:
* - XSS-dangerous encodings (except ISO-2022-JP which is assumed to be
* too common not to be included).
* - XSS-dangerous encodings.
* - x-user-defined, which practically never makes sense as an end-user-chosen
* override.
* - Encodings that IE11 doesn't have in its correspoding menu.
@@ -65,7 +64,7 @@ const kEncodings = new Set([
// Japanese
"Shift_JIS",
"EUC-JP",
"ISO-2022-JP",
// "ISO-2022-JP", // Intentionally not in menu.
// Korean
"EUC-KR",
// Thai