import changes from tenfourfox:

- closes #624: update Readability to tip (92be8e3be)
- closes #626: M1440677 (81acd2b99)
- moar adblock hosts (f8a60dfea)
- add javascript on-off menuitem in tools (191db31e6)
- #612: M253143 M1643126 (9d3a83602)
- #612: M1224669 (819a49413)
- #628: M1663642 M1663439 (bc184b4c4)
- #628: update TLDs, pins, HSTS (16c85bb33)
This commit is contained in:
2020-10-08 21:34:34 +08:00
parent 698630799d
commit 7391af2bb4
20 changed files with 2337 additions and 1753 deletions
+8 -1
View File
@@ -482,7 +482,8 @@
<menu id="tools-menu"
label="&toolsMenu.label;"
accesskey="&toolsMenu.accesskey;"
onpopupshowing="mirrorShow(this)">
onpopupshowing="mirrorShow(this);
jsToggle.updateMenu();">
<menupopup id="menu_ToolsPopup"
# We have to use setTimeout() here to avoid a flickering menu bar when opening
# the Tools menu, see bug 970769. This can be removed once we got rid of the
@@ -516,6 +517,12 @@
accesskey="&syncReAuthItem.accesskey;"
observes="sync-reauth-state"
oncommand="gSyncUI.openSignInAgainPage('menubar');"/>
<menuseparator/>
<menuitem id="toggle_javascript"
label="Enable JavaScript"
type="checkbox"
command="cmd_toggleJavascript"
checked="true"/>
<menuseparator id="devToolsSeparator"/>
<menu id="webDeveloperMenu"
label="&webDeveloperMenu.label;"
+1
View File
@@ -86,6 +86,7 @@
<command id="cmd_fullZoomEnlarge" oncommand="FullZoom.enlarge()"/>
<command id="cmd_fullZoomReset" oncommand="FullZoom.reset()"/>
<command id="cmd_fullZoomToggle" oncommand="ZoomManager.toggleZoom();"/>
<command id="cmd_toggleJavascript" oncommand="jsToggle.toggle();"/>
<command id="cmd_gestureRotateLeft" oncommand="gGestureSupport.rotate(event.sourceEvent)"/>
<command id="cmd_gestureRotateRight" oncommand="gGestureSupport.rotate(event.sourceEvent)"/>
<command id="cmd_gestureRotateEnd" oncommand="gGestureSupport.rotateEnd()"/>
+1
View File
@@ -5,6 +5,7 @@
<script type="application/javascript" src="chrome://global/content/printUtils.js"/>
<script type="application/javascript" src="chrome://global/content/viewZoomOverlay.js"/>
<script type="application/javascript" src="chrome://global/content/jsToggle.js"/>
<script type="application/javascript" src="chrome://browser/content/places/browserPlacesViews.js"/>
<script type="application/javascript" src="chrome://browser/content/browser.js"/>
<script type="application/javascript" src="chrome://browser/content/downloads/downloads.js"/>
+5
View File
@@ -1245,6 +1245,11 @@ nsScriptSecurityManager::CheckLoadURIWithPrincipal(nsIPrincipal* aPrincipal,
BLOK("w.usabilla.com") ||
BLOK("beacon.sojern.com") ||
BLOK("s3.buysellads.com") ||
BLOK("srv.buysellads.com") ||
#include "shavar-blocklist.h"
0) {
+4
View File
@@ -2712,6 +2712,8 @@ gfxFont::ShapeTextWithoutWordCache(gfxContext *aContext,
aTextRun->SetIsTab(aOffset + i);
} else if (ch == '\n') {
aTextRun->SetIsNewline(aOffset + i);
} else if (GetGeneralCategory(ch) == HB_UNICODE_GENERAL_CATEGORY_FORMAT) {
aTextRun->SetIsFormattingControl(aOffset + i);
} else if (IsInvalidControlChar(ch) &&
!(aTextRun->GetFlags() & gfxTextRunFactory::TEXT_HIDE_CONTROL_CHARACTERS)) {
if (GetFontEntry()->IsUserFont() && HasCharacter(ch)) {
@@ -2926,6 +2928,8 @@ gfxFont::SplitAndInitTextRun(gfxContext *aContext,
aTextRun->SetIsTab(aRunStart + i);
} else if (ch == '\n') {
aTextRun->SetIsNewline(aRunStart + i);
} else if (GetGeneralCategory(ch) == HB_UNICODE_GENERAL_CATEGORY_FORMAT) {
aTextRun->SetIsFormattingControl(aRunStart + i);
} else if (IsInvalidControlChar(ch) &&
!(aTextRun->GetFlags() & gfxTextRunFactory::TEXT_HIDE_CONTROL_CHARACTERS)) {
if (GetFontEntry()->IsUserFont() && HasCharacter(ch)) {
+14 -1
View File
@@ -747,7 +747,12 @@ public:
// which is not combined with any combining characters. This flag is
// set for all those characters except 0x20 whitespace.
FLAG_CHAR_NO_EMPHASIS_MARK = 0x20,
CHAR_TYPE_FLAGS_MASK = 0x38,
// Per CSS Text, letter-spacing is not applied to formatting chars
// (category Cf). We mark those in the textrun so as to be able to
// skip them when setting up spacing in nsTextFrame.
FLAG_CHAR_IS_FORMATTING_CONTROL = 0x40,
CHAR_TYPE_FLAGS_MASK = 0x78,
GLYPH_COUNT_MASK = 0x00FFFF00U,
GLYPH_COUNT_SHIFT = 8
@@ -802,6 +807,10 @@ public:
return !CharIsSpace() &&
(IsSimpleGlyph() || !(mValue & FLAG_CHAR_NO_EMPHASIS_MARK));
}
bool CharIsFormattingControl() const {
return !IsSimpleGlyph() &&
(mValue & FLAG_CHAR_IS_FORMATTING_CONTROL) != 0;
}
uint32_t CharTypeFlags() const {
return IsSimpleGlyph() ? 0 : (mValue & CHAR_TYPE_FLAGS_MASK);
@@ -880,6 +889,10 @@ public:
NS_ASSERTION(!IsSimpleGlyph(), "Expected non-simple-glyph");
mValue |= FLAG_CHAR_NO_EMPHASIS_MARK;
}
void SetIsFormattingControl() {
NS_ASSERTION(!IsSimpleGlyph(), "Expected non-simple-glyph");
mValue |= FLAG_CHAR_IS_FORMATTING_CONTROL;
}
private:
uint32_t mValue;
+5 -1
View File
@@ -1947,8 +1947,12 @@ gfxFontGroup::IsInvalidChar(char16_t ch)
if (ch <= 0x9f) {
return true;
}
// Word-separating format/bidi control characters are not shaped as part
// of words.
return (((ch & 0xFF00) == 0x2000 /* Unicode control character */ &&
(ch == 0x200B/*ZWSP*/ || ch == 0x2028/*LSEP*/ || ch == 0x2029/*PSEP*/)) ||
(ch == 0x200B/*ZWSP*/ || ch == 0x2028/*LSEP*/ ||
ch == 0x2029/*PSEP*/ || ch == 0x2060/*WJ*/)) ||
ch == 0xfeff/*ZWNBSP*/ ||
IsBidiControl(ch));
}
+7
View File
@@ -133,6 +133,10 @@ public:
NS_ASSERTION(aPos < GetLength(), "aPos out of range");
return mCharacterGlyphs[aPos].CharMayHaveEmphasisMark();
}
bool CharIsFormattingControl(uint32_t aPos) const {
MOZ_ASSERT(aPos < GetLength());
return mCharacterGlyphs[aPos].CharIsFormattingControl();
}
// All uint32_t aStart, uint32_t aLength ranges below are restricted to
// grapheme cluster boundaries! All offsets are in terms of the string
@@ -532,6 +536,9 @@ public:
void SetNoEmphasisMark(uint32_t aIndex) {
EnsureComplexGlyph(aIndex).SetNoEmphasisMark();
}
void SetIsFormattingControl(uint32_t aIndex) {
EnsureComplexGlyph(aIndex).SetIsFormattingControl();
}
/**
* Prefetch all the glyph extents needed to ensure that Measure calls
+24 -9
View File
@@ -3215,12 +3215,15 @@ PropertyProvider::GetSpacing(uint32_t aStart, uint32_t aLength,
}
static bool
CanAddSpacingAfter(gfxTextRun* aTextRun, uint32_t aOffset)
CanAddSpacingAfter(gfxTextRun* aTextRun, uint32_t aOffset,
bool aNewlineIsSignificant)
{
if (aOffset + 1 >= aTextRun->GetLength())
return true;
return aTextRun->IsClusterStart(aOffset + 1) &&
aTextRun->IsLigatureGroupStart(aOffset + 1);
aTextRun->IsLigatureGroupStart(aOffset + 1) &&
!aTextRun->CharIsFormattingControl(aOffset) &&
!(aNewlineIsSignificant && aTextRun->CharIsNewline(aOffset));
}
void
@@ -3244,11 +3247,13 @@ PropertyProvider::GetSpacingInternal(uint32_t aStart, uint32_t aLength,
// Iterate over non-skipped characters
nsSkipCharsRunIterator
run(start, nsSkipCharsRunIterator::LENGTH_UNSKIPPED_ONLY, aLength);
bool newlineIsSignificant = mTextStyle->NewlineIsSignificant(mFrame);
while (run.NextRun()) {
uint32_t runOffsetInSubstring = run.GetSkippedOffset() - aStart;
gfxSkipCharsIterator iter = run.GetPos();
for (int32_t i = 0; i < run.GetRunLength(); ++i) {
if (CanAddSpacingAfter(mTextRun, run.GetSkippedOffset() + i)) {
if (CanAddSpacingAfter(mTextRun, run.GetSkippedOffset() + i,
newlineIsSignificant)) {
// End of a cluster, not in a ligature: put letter-spacing after it
aSpacing[runOffsetInSubstring + i].mAfter += mLetterSpacing;
}
@@ -7930,7 +7935,9 @@ nsTextFrame::AddInlineMinISizeForFlow(nsRenderingContext *aRenderingContext,
if (i > wordStart) {
nscoord width =
NSToCoordCeilClamped(textRun->GetAdvanceWidth(wordStart, i - wordStart, &provider));
NSToCoordCeilClamped(textRun->GetAdvanceWidth(wordStart, i - wordStart,
&provider));
width = std::max(0, width);
aData->currentLine = NSCoordSaturatingAdd(aData->currentLine, width);
aData->atStartOfLine = false;
@@ -7942,8 +7949,11 @@ nsTextFrame::AddInlineMinISizeForFlow(nsRenderingContext *aRenderingContext,
aData->trailingWhitespace += width;
} else {
// Some non-whitespace so the old trailingWhitespace is no longer trailing
aData->trailingWhitespace =
NSToCoordCeilClamped(textRun->GetAdvanceWidth(trimStart, i - trimStart, &provider));
nscoord wsWidth =
NSToCoordCeilClamped(textRun->GetAdvanceWidth(trimStart,
i - trimStart,
&provider));
aData->trailingWhitespace = std::max(0, wsWidth);
}
} else {
aData->trailingWhitespace = 0;
@@ -8082,7 +8092,9 @@ nsTextFrame::AddInlinePrefISizeForFlow(nsRenderingContext *aRenderingContext,
if (i > lineStart) {
nscoord width =
NSToCoordCeilClamped(textRun->GetAdvanceWidth(lineStart, i - lineStart, &provider));
NSToCoordCeilClamped(textRun->GetAdvanceWidth(lineStart, i - lineStart,
&provider));
width = std::max(0, width);
aData->currentLine = NSCoordSaturatingAdd(aData->currentLine, width);
if (collapseWhitespace) {
@@ -8093,8 +8105,11 @@ nsTextFrame::AddInlinePrefISizeForFlow(nsRenderingContext *aRenderingContext,
aData->trailingWhitespace += width;
} else {
// Some non-whitespace so the old trailingWhitespace is no longer trailing
aData->trailingWhitespace =
NSToCoordCeilClamped(textRun->GetAdvanceWidth(trimStart, i - trimStart, &provider));
nscoord wsWidth =
NSToCoordCeilClamped(textRun->GetAdvanceWidth(trimStart,
i - trimStart,
&provider));
aData->trailingWhitespace = std::max(0, wsWidth);
}
} else {
aData->trailingWhitespace = 0;
+20 -14
View File
@@ -101,6 +101,14 @@ struct nsTableReflowState {
availSize.BSize(wm) = std::max(0, availSize.BSize(wm));
}
}
void ReduceAvailableBSizeBy(WritingMode aWM, nscoord aAmount) {
if (availSize.BSize(aWM) == NS_UNCONSTRAINEDSIZE) {
return;
}
availSize.BSize(aWM) -= aAmount;
availSize.BSize(aWM) = std::max(0, availSize.BSize(aWM));
}
};
/********************************************************************************
@@ -2809,9 +2817,7 @@ nsTableFrame::PlaceChild(nsTableReflowState& aReflowState,
aReflowState.bCoord += aKidDesiredSize.BSize(wm);
// If our bsize is constrained, then update the available bsize
if (NS_UNCONSTRAINEDSIZE != aReflowState.availSize.BSize(wm)) {
aReflowState.availSize.BSize(wm) -= aKidDesiredSize.BSize(wm);
}
aReflowState.ReduceAvailableBSizeBy(wm, aKidDesiredSize.BSize(wm));
}
void
@@ -3055,17 +3061,24 @@ nsTableFrame::ReflowChildren(nsTableReflowState& aReflowState,
// using the footer's prev-in-flow's height instead of reflowing it again,
// but there's no real need.
if (isPaginated) {
bool reorder = false;
if (thead && !GetPrevInFlow()) {
reorder = thead->GetNextInFlow();
nscoord desiredHeight;
nsresult rv = SetupHeaderFooterChild(aReflowState, thead, &desiredHeight);
if (NS_FAILED(rv))
return;
}
if (tfoot) {
reorder = reorder || tfoot->GetNextInFlow();
nsresult rv = SetupHeaderFooterChild(aReflowState, tfoot, &footerHeight);
if (NS_FAILED(rv))
return;
}
if (reorder) {
// Reorder row groups - the reflow may have changed the nextinflows.
OrderRowGroups(rowGroups, &thead, &tfoot);
}
}
// if the child is a tbody in paginated mode reduce the height by a repeated footer
bool allowRepeatedFooter = false;
@@ -3131,14 +3144,10 @@ nsTableFrame::ReflowChildren(nsTableReflowState& aReflowState,
kidReflowState.mFlags.mIsTopOfPage = false;
}
aReflowState.bCoord += cellSpacingB;
if (NS_UNCONSTRAINEDSIZE != aReflowState.availSize.BSize(wm)) {
aReflowState.availSize.BSize(wm) -= cellSpacingB;
}
aReflowState.ReduceAvailableBSizeBy(wm, cellSpacingB);
// record the presence of a next in flow, it might get destroyed so we
// need to reorder the row group array
bool reorder = false;
if (kidFrame->GetNextInFlow())
reorder = true;
const bool reorder = kidFrame->GetNextInFlow();
LogicalPoint kidPosition(wm, aReflowState.iCoord, aReflowState.bCoord);
ReflowChild(kidFrame, presContext, desiredSize, kidReflowState,
@@ -3146,7 +3155,7 @@ nsTableFrame::ReflowChildren(nsTableReflowState& aReflowState,
kidReflowState.ApplyRelativePositioning(&kidPosition, containerSize);
if (reorder) {
// reorder row groups the reflow may have changed the nextinflows
// Reorder row groups - the reflow may have changed the nextinflows.
OrderRowGroups(rowGroups, &thead, &tfoot);
childX = rowGroups.IndexOf(kidFrame);
if (childX == RowGroupArray::NoIndex) {
@@ -3294,10 +3303,7 @@ nsTableFrame::ReflowChildren(nsTableReflowState& aReflowState,
}
aReflowState.bCoord += kidRect.BSize(wm);
// If our bsize is constrained then update the available bsize.
if (NS_UNCONSTRAINEDSIZE != aReflowState.availSize.BSize(wm)) {
aReflowState.availSize.BSize(wm) -= cellSpacingB + kidRect.BSize(wm);
}
aReflowState.ReduceAvailableBSizeBy(wm, cellSpacingB + kidRect.BSize(wm));
}
}
+3
View File
@@ -1316,6 +1316,9 @@ nsStandardURL::SetSpec(const nsACString &input)
// parse the given URL...
nsresult rv = ParseURL(spec, specLength);
if (mScheme.mLen <= 0) {
rv = NS_ERROR_MALFORMED_URI;
}
if (NS_SUCCEEDED(rv)) {
// finally, use the URLSegment member variables to build a normalized
// copy of |spec|
+23 -15
View File
@@ -907,16 +907,18 @@ org.do
sld.do
web.do
// dz : https://en.wikipedia.org/wiki/.dz
// dz : http://www.nic.dz/images/pdf_nic/charte.pdf
dz
art.dz
asso.dz
com.dz
edu.dz
gov.dz
org.dz
net.dz
gov.dz
edu.dz
asso.dz
pol.dz
art.dz
soc.dz
tm.dz
// ec : http://www.nic.ec/reg/paso1.asp
// Submitted by registry <vabboud@nic.ec>
@@ -4697,13 +4699,12 @@ web.ni
// ccTLD for the Netherlands
nl
// no : http://www.norid.no/regelverk/index.en.html
// The Norwegian registry has declined to notify us of updates. The web pages
// referenced below are the official source of the data. There is also an
// announce mailing list:
// https://postlister.uninett.no/sympa/info/norid-diskusjon
// no : https://www.norid.no/en/om-domenenavn/regelverk-for-no/
// Norid geographical second level domains : https://www.norid.no/en/om-domenenavn/regelverk-for-no/vedlegg-b/
// Norid category second level domains : https://www.norid.no/en/om-domenenavn/regelverk-for-no/vedlegg-c/
// Norid category second-level domains managed by parties other than Norid : https://www.norid.no/en/om-domenenavn/regelverk-for-no/vedlegg-d/
no
// Norid generic domains : http://www.norid.no/regelverk/vedlegg-c.en.html
// Norid category second level domains : https://www.norid.no/en/om-domenenavn/regelverk-for-no/vedlegg-c/
fhs.no
vgs.no
fylkesbibl.no
@@ -4711,13 +4712,13 @@ folkebibl.no
museum.no
idrett.no
priv.no
// Non-Norid generic domains : http://www.norid.no/regelverk/vedlegg-d.en.html
// Norid category second-level domains managed by parties other than Norid : https://www.norid.no/en/om-domenenavn/regelverk-for-no/vedlegg-d/
mil.no
stat.no
dep.no
kommune.no
herad.no
// no geographical names : http://www.norid.no/regelverk/vedlegg-b.en.html
// Norid geographical second level domains : https://www.norid.no/en/om-domenenavn/regelverk-for-no/vedlegg-b/
// counties
aa.no
ah.no
@@ -7109,7 +7110,7 @@ org.zw
// newGTLDs
// List of new gTLDs imported from https://www.icann.org/resources/registries/gtlds/v2/gtlds.json on 2020-09-09T17:29:56Z
// List of new gTLDs imported from https://www.icann.org/resources/registries/gtlds/v2/gtlds.json on 2020-09-17T17:33:58Z
// This list is auto-generated, don't edit it manually.
// aaa : 2015-02-26 American Automobile Association, Inc.
aaa
@@ -9184,7 +9185,7 @@ nokia
// northwesternmutual : 2015-06-18 Northwestern Mutual Registry, LLC
northwesternmutual
// norton : 2014-12-04 Symantec Corporation
// norton : 2014-12-04 NortonLifeLock Inc.
norton
// now : 2015-06-25 Amazon Registry Services, Inc.
@@ -11232,6 +11233,10 @@ drud.us
// Submitted by Richard Harper <richard@duckdns.org>
duckdns.org
// Bip : https://bip.sh
// Submitted by Joel Kennedy <joel@bip.sh>
bip.sh
// bitbridge.net : Submitted by Craig Welch, abeliidev@gmail.com
bitbridge.net
@@ -12002,6 +12007,7 @@ googleapis.com
googlecode.com
pagespeedmobilizer.com
publishproxy.com
translate.goog
withgoogle.com
withyoutube.com
@@ -13374,7 +13380,9 @@ cx.ua
// WP Engine : https://wpengine.com/
// Submitted by Michael Smith <michael.smith@wpengine.com>
// Submitted by Brandon DuRette <brandon.durette@wpengine.com>
wpenginepowered.com
js.wpenginepowered.com
// Impertrix Solutions : <https://impertrixcdn.com>
// Submitted by Zhixiang Zhao <csuite@impertrix.com>
+6 -3
View File
@@ -482,10 +482,13 @@ nsMIMEHeaderParamImpl::DoParameterInternal(const char *aHeaderValue,
if (*str != '"') {
// The value is a token, not a quoted string.
valueStart = str;
for (valueEnd = str;
*valueEnd && !nsCRT::IsAsciiSpace (*valueEnd) && *valueEnd != ';';
valueEnd++)
for (valueEnd = str; *valueEnd && *valueEnd != ';'; valueEnd++) {
;
}
// ignore trailing whitespace:
while (valueEnd > valueStart && nsCRT::IsAsciiSpace(*(valueEnd - 1))) {
valueEnd--;
}
str = valueEnd;
} else {
isQuotedString = true;
+11 -1
View File
@@ -151,6 +151,14 @@ static const char kGOOGLE_PIN_GeoTrustGlobal2Fingerprint[] =
static const char kGOOGLE_PIN_GoDaddySecureFingerprint[] =
"MrZLZnJ6IGPkBm87lYywqu5Xal7O/ZUzmbuIdHMdlYc=";
/* GOOGLE_PIN_R3LetsEncrypt */
static const char kGOOGLE_PIN_R3LetsEncryptFingerprint[] =
"jQJTbIh0grw0/1TkHSumWb+Fs0Ggogr621gT3PvPKG0=";
/* GOOGLE_PIN_R4LetsEncrypt */
static const char kGOOGLE_PIN_R4LetsEncryptFingerprint[] =
"5VReIRNHJBiRxVSgOTTN6bdJZkpZ0m1hX+WPd5kPLQM=";
/* GOOGLE_PIN_RapidSSL */
static const char kGOOGLE_PIN_RapidSSLFingerprint[] =
"lT09gPUeQfbYrlxRtpsHrjDblj9Rpz+u7ajfCrg4qDM=";
@@ -481,10 +489,12 @@ static const StaticFingerprints kPinset_google = {
};
static const char* const kPinset_tor_Data[] = {
kGOOGLE_PIN_R4LetsEncryptFingerprint,
kTor3Fingerprint,
kDigiCert_High_Assurance_EV_Root_CAFingerprint,
kLet_s_Encrypt_Authority_X3Fingerprint,
kTor1Fingerprint,
kGOOGLE_PIN_R3LetsEncryptFingerprint,
kGOOGLE_PIN_RapidSSLFingerprint,
kLet_s_Encrypt_Authority_X4Fingerprint,
kTor2Fingerprint,
@@ -1149,4 +1159,4 @@ static const TransportSecurityPreload kPublicKeyPinningPreloadList[] = {
static const int32_t kUnknownId = -1;
static const PRTime kPreloadPKPinsExpirationTime = INT64_C(1608571180297000);
static const PRTime kPreloadPKPinsExpirationTime = INT64_C(1610028953290000);
File diff suppressed because it is too large Load Diff
+10 -3
View File
@@ -278,7 +278,7 @@
var whitespace = [" ", "\t", "\n", "\r"];
// See http://www.w3schools.com/dom/dom_nodetype.asp
// See https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeType
var nodeTypes = {
ELEMENT_NODE: 1,
ATTRIBUTE_NODE: 2,
@@ -705,7 +705,6 @@
}
// Using Array.join() avoids the overhead from lazy string concatenation.
// See http://blog.cdleary.com/2012/01/string-representation-in-spidermonkey/#ropes
var arr = [];
getHTML(this);
return arr.join("");
@@ -875,7 +874,11 @@
JSDOMParser.prototype = {
error: function(m) {
dump("JSDOMParser error: " + m + "\n");
if (typeof dump !== "undefined") {
dump("JSDOMParser error: " + m + "\n");
} else if (typeof console !== "undefined") {
console.log("JSDOMParser error: " + m + "\n");
}
this.errorState += m + "\n";
},
@@ -1187,3 +1190,7 @@
global.JSDOMParser = JSDOMParser;
})(this);
if (typeof module === "object") {
module.exports = this.JSDOMParser;
}
@@ -1,5 +1,4 @@
/* eslint-env es6:false */
/* globals exports */
/*
* Copyright (c) 2010 Arc90 Inc
*
@@ -95,6 +94,6 @@ function isProbablyReaderable(doc, isVisible) {
});
}
if (typeof exports === "object") {
exports.isProbablyReaderable = isProbablyReaderable;
if (typeof module === "object") {
module.exports = isProbablyReaderable;
}
+363 -31
View File
@@ -50,6 +50,10 @@ function Readability(doc, options) {
this._charThreshold = options.charThreshold || this.DEFAULT_CHAR_THRESHOLD;
this._classesToPreserve = this.CLASSES_TO_PRESERVE.concat(options.classesToPreserve || []);
this._keepClasses = !!options.keepClasses;
this._serializer = options.serializer || function(el) {
return el.innerHTML;
};
this._disableJSONLD = !!options.disableJSONLD;
// Start with all flags set
this._flags = this.FLAG_STRIP_UNLIKELYS |
@@ -131,8 +135,14 @@ Readability.prototype = {
prevLink: /(prev|earl|old|new|<|«)/i,
whitespace: /^\s*$/,
hasContent: /\S$/,
srcsetUrl: /(\S+)(\s+[\d.]+[xw])?(\s*(?:,|$))/g,
b64DataUrl: /^data:\s*([^\s;,]+)\s*;\s*base64\s*,/i,
// See: https://schema.org/Article
jsonLdArticleTypes: /^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/
},
UNLIKELY_ROLES: [ "menu", "menubar", "complementary", "navigation", "alert", "alertdialog", "dialog" ],
DIV_TO_P_ELEMS: [ "A", "BLOCKQUOTE", "DL", "DIV", "IMG", "OL", "P", "PRE", "TABLE", "UL", "SELECT" ],
ALTER_TO_DIV_EXCEPTIONS: ["DIV", "ARTICLE", "SECTION", "P"],
@@ -155,6 +165,15 @@ Readability.prototype = {
// These are the classes that readability sets itself.
CLASSES_TO_PRESERVE: [ "page" ],
// These are the list of HTML entities that need to be escaped.
HTML_ESCAPE_MAP: {
"lt": "<",
"gt": ">",
"amp": "&",
"quot": '"',
"apos": "'",
},
/**
* Run any post-process modifications to article content as necessary.
*
@@ -165,6 +184,8 @@ Readability.prototype = {
// Readability cannot open relative uris so we convert them to absolute uris.
this._fixRelativeUris(articleContent);
this._simplifyNestedElements(articleContent);
if (!this._keepClasses) {
// Remove classes.
this._cleanClasses(articleContent);
@@ -230,6 +251,21 @@ Readability.prototype = {
Array.prototype.forEach.call(nodeList, fn, this);
},
/**
* Iterate over a NodeList, and return the first node that passes
* the supplied test function
*
* For convenience, the current object context is applied to the provided
* test function.
*
* @param NodeList nodeList The NodeList.
* @param Function fn The test function.
* @return void
*/
_findNode: function(nodeList, fn) {
return Array.prototype.find.call(nodeList, fn, this);
},
/**
* Iterate over a NodeList, return true if any of the provided iterate
* function calls returns true, false otherwise.
@@ -328,6 +364,7 @@ Readability.prototype = {
if (baseURI == documentURI && uri.charAt(0) == "#") {
return uri;
}
// Otherwise, resolve against base URI:
try {
return new URL(uri, baseURI).href;
@@ -362,15 +399,56 @@ Readability.prototype = {
}
});
var imgs = this._getAllNodesWithTag(articleContent, ["img"]);
this._forEachNode(imgs, function(img) {
var src = img.getAttribute("src");
var medias = this._getAllNodesWithTag(articleContent, [
"img", "picture", "figure", "video", "audio", "source"
]);
this._forEachNode(medias, function(media) {
var src = media.getAttribute("src");
var poster = media.getAttribute("poster");
var srcset = media.getAttribute("srcset");
if (src) {
img.setAttribute("src", toAbsoluteURI(src));
media.setAttribute("src", toAbsoluteURI(src));
}
if (poster) {
media.setAttribute("poster", toAbsoluteURI(poster));
}
if (srcset) {
var newSrcset = srcset.replace(this.REGEXPS.srcsetUrl, function(_, p1, p2, p3) {
return toAbsoluteURI(p1) + (p2 || "") + p3;
});
media.setAttribute("srcset", newSrcset);
}
});
},
_simplifyNestedElements: function(articleContent) {
var node = articleContent;
while (node) {
if (node.parentNode && ["DIV", "SECTION"].includes(node.tagName) && !(node.id && node.id.startsWith("readability"))) {
if (this._isElementWithoutContent(node)) {
node = this._removeAndGetNext(node);
continue;
} else if (this._hasSingleTagInsideElement(node, "DIV") || this._hasSingleTagInsideElement(node, "SECTION")) {
var child = node.children[0];
for (var i = 0; i < node.attributes.length; i++) {
child.setAttribute(node.attributes[i].name, node.attributes[i].value);
}
node.parentNode.replaceChild(child, node);
node = child;
continue;
}
}
node = this._getNextNode(node);
}
},
/**
* Get the article title as an H1.
*
@@ -840,6 +918,12 @@ Readability.prototype = {
node = this._removeAndGetNext(node);
continue;
}
if (this.UNLIKELY_ROLES.includes(node.getAttribute("role"))) {
this.log("Removing content with role " + node.getAttribute("role") + " - " + matchString);
node = this._removeAndGetNext(node);
continue;
}
}
// Remove DIV, SECTION, and HEADER nodes without any content(e.g. text, image, video, or iframe).
@@ -913,7 +997,7 @@ Readability.prototype = {
return;
// Exclude nodes with no ancestor.
var ancestors = this._getNodeAncestors(elementToScore, 3);
var ancestors = this._getNodeAncestors(elementToScore, 5);
if (ancestors.length === 0)
return;
@@ -1233,12 +1317,111 @@ Readability.prototype = {
return false;
},
/**
* Converts some of the common HTML entities in string to their corresponding characters.
*
* @param str {string} - a string to unescape.
* @return string without HTML entity.
*/
_unescapeHtmlEntities: function(str) {
if (!str) {
return str;
}
var htmlEscapeMap = this.HTML_ESCAPE_MAP;
return str.replace(/&(quot|amp|apos|lt|gt);/g, function(_, tag) {
return htmlEscapeMap[tag];
}).replace(/&#(?:x([0-9a-z]{1,4})|([0-9]{1,4}));/gi, function(_, hex, numStr) {
var num = parseInt(hex || numStr, hex ? 16 : 10);
return String.fromCharCode(num);
});
},
/**
* Try to extract metadata from JSON-LD object.
* For now, only Schema.org objects of type Article or its subtypes are supported.
* @return Object with any metadata that could be extracted (possibly none)
*/
_getJSONLD: function (doc) {
var scripts = this._getAllNodesWithTag(doc, ["script"]);
var jsonLdElement = this._findNode(scripts, function(el) {
return el.getAttribute("type") === "application/ld+json";
});
if (jsonLdElement) {
try {
// Strip CDATA markers if present
var content = jsonLdElement.textContent.replace(/^\s*<!\[CDATA\[|\]\]>\s*$/g, "");
var parsed = JSON.parse(content);
var metadata = {};
if (
!parsed["@context"] ||
!parsed["@context"].match(/^https?\:\/\/schema\.org$/)
) {
return metadata;
}
if (!parsed["@type"] && Array.isArray(parsed["@graph"])) {
parsed = parsed["@graph"].find(function(it) {
return (it["@type"] || "").match(
this.REGEXPS.jsonLdArticleTypes
);
});
}
if (
!parsed ||
!parsed["@type"] ||
!parsed["@type"].match(this.REGEXPS.jsonLdArticleTypes)
) {
return metadata;
}
if (typeof parsed.name === "string") {
metadata.title = parsed.name.trim();
} else if (typeof parsed.headline === "string") {
metadata.title = parsed.headline.trim();
}
if (parsed.author) {
if (typeof parsed.author.name === "string") {
metadata.byline = parsed.author.name.trim();
} else if (Array.isArray(parsed.author) && parsed.author[0] && typeof parsed.author[0].name === "string") {
metadata.byline = parsed.author
.filter(function(author) {
return author && typeof author.name === "string";
})
.map(function(author) {
return author.name.trim();
})
.join(", ");
}
}
if (typeof parsed.description === "string") {
metadata.excerpt = parsed.description.trim();
}
if (
parsed.publisher &&
typeof parsed.publisher.name === "string"
) {
metadata.siteName = parsed.publisher.name.trim();
}
return metadata;
} catch (err) {
this.log(err.message);
}
}
return {};
},
/**
* Attempts to get excerpt and byline metadata for the article.
*
* @param {Object} jsonld object containing any metadata that
* could be extracted from JSON-LD object.
*
* @return Object with optional "excerpt" and "byline" properties
*/
_getArticleMetadata: function() {
_getArticleMetadata: function(jsonld) {
var metadata = {};
var values = {};
var metaElements = this._doc.getElementsByTagName("meta");
@@ -1284,7 +1467,8 @@ Readability.prototype = {
});
// get title
metadata.title = values["dc:title"] ||
metadata.title = jsonld.title ||
values["dc:title"] ||
values["dcterm:title"] ||
values["og:title"] ||
values["weibo:article:title"] ||
@@ -1297,12 +1481,14 @@ Readability.prototype = {
}
// get author
metadata.byline = values["dc:creator"] ||
metadata.byline = jsonld.byline ||
values["dc:creator"] ||
values["dcterm:creator"] ||
values["author"];
// get description
metadata.excerpt = values["dc:description"] ||
metadata.excerpt = jsonld.excerpt ||
values["dc:description"] ||
values["dcterm:description"] ||
values["og:description"] ||
values["weibo:article:description"] ||
@@ -1311,11 +1497,114 @@ Readability.prototype = {
values["twitter:description"];
// get site name
metadata.siteName = values["og:site_name"];
metadata.siteName = jsonld.siteName ||
values["og:site_name"];
// in many sites the meta value is escaped with HTML entities,
// so here we need to unescape it
metadata.title = this._unescapeHtmlEntities(metadata.title);
metadata.byline = this._unescapeHtmlEntities(metadata.byline);
metadata.excerpt = this._unescapeHtmlEntities(metadata.excerpt);
metadata.siteName = this._unescapeHtmlEntities(metadata.siteName);
return metadata;
},
/**
* Check if node is image, or if node contains exactly only one image
* whether as a direct child or as its descendants.
*
* @param Element
**/
_isSingleImage: function(node) {
if (node.tagName === "IMG") {
return true;
}
if (node.children.length !== 1 || node.textContent.trim() !== "") {
return false;
}
return this._isSingleImage(node.children[0]);
},
/**
* Find all <noscript> that are located after <img> nodes, and which contain only one
* <img> element. Replace the first image with the image from inside the <noscript> tag,
* and remove the <noscript> tag. This improves the quality of the images we use on
* some sites (e.g. Medium).
*
* @param Element
**/
_unwrapNoscriptImages: function(doc) {
// Find img without source or attributes that might contains image, and remove it.
// This is done to prevent a placeholder img is replaced by img from noscript in next step.
var imgs = Array.from(doc.getElementsByTagName("img"));
this._forEachNode(imgs, function(img) {
for (var i = 0; i < img.attributes.length; i++) {
var attr = img.attributes[i];
switch (attr.name) {
case "src":
case "srcset":
case "data-src":
case "data-srcset":
return;
}
if (/\.(jpg|jpeg|png|webp)/i.test(attr.value)) {
return;
}
}
img.parentNode.removeChild(img);
});
// Next find noscript and try to extract its image
var noscripts = Array.from(doc.getElementsByTagName("noscript"));
this._forEachNode(noscripts, function(noscript) {
// Parse content of noscript and make sure it only contains image
var tmp = doc.createElement("div");
tmp.innerHTML = noscript.innerHTML;
if (!this._isSingleImage(tmp)) {
return;
}
// If noscript has previous sibling and it only contains image,
// replace it with noscript content. However we also keep old
// attributes that might contains image.
var prevElement = noscript.previousElementSibling;
if (prevElement && this._isSingleImage(prevElement)) {
var prevImg = prevElement;
if (prevImg.tagName !== "IMG") {
prevImg = prevElement.getElementsByTagName("img")[0];
}
var newImg = tmp.getElementsByTagName("img")[0];
for (var i = 0; i < prevImg.attributes.length; i++) {
var attr = prevImg.attributes[i];
if (attr.value === "") {
continue;
}
if (attr.name === "src" || attr.name === "srcset" || /\.(jpg|jpeg|png|webp)/i.test(attr.value)) {
if (newImg.getAttribute(attr.name) === attr.value) {
continue;
}
var attrName = attr.name;
if (newImg.hasAttribute(attrName)) {
attrName = "data-old-" + attrName;
}
newImg.setAttribute(attrName, attr.value);
}
}
noscript.parentNode.replaceChild(tmp.firstElementChild, prevElement);
}
});
},
/**
* Removes script tags from the document.
*
@@ -1644,30 +1933,67 @@ Readability.prototype = {
/* convert images and figures that have properties like data-src into images that can be loaded without JS */
_fixLazyImages: function (root) {
this._forEachNode(this._getAllNodesWithTag(root, ["img", "picture", "figure"]), function (elem) {
// also check for "null" to work around https://github.com/jsdom/jsdom/issues/2580
if ((!elem.src && (!elem.srcset || elem.srcset == "null")) || elem.className.toLowerCase().indexOf("lazy") !== -1) {
// In some sites (e.g. Kotaku), they put 1px square image as base64 data uri in the src attribute.
// So, here we check if the data uri is too short, just might as well remove it.
if (elem.src && this.REGEXPS.b64DataUrl.test(elem.src)) {
// Make sure it's not SVG, because SVG can have a meaningful image in under 133 bytes.
var parts = this.REGEXPS.b64DataUrl.exec(elem.src);
if (parts[1] === "image/svg+xml") {
return;
}
// Make sure this element has other attributes which contains image.
// If it doesn't, then this src is important and shouldn't be removed.
var srcCouldBeRemoved = false;
for (var i = 0; i < elem.attributes.length; i++) {
var attr = elem.attributes[i];
if (attr.name === "src" || attr.name === "srcset") {
if (attr.name === "src") {
continue;
}
var copyTo = null;
if (/\.(jpg|jpeg|png|webp)\s+\d/.test(attr.value)) {
copyTo = "srcset";
} else if (/^\s*\S+\.(jpg|jpeg|png|webp)\S*\s*$/.test(attr.value)) {
copyTo = "src";
if (/\.(jpg|jpeg|png|webp)/i.test(attr.value)) {
srcCouldBeRemoved = true;
break;
}
if (copyTo) {
//if this is an img or picture, set the attribute directly
if (elem.tagName === "IMG" || elem.tagName === "PICTURE") {
elem.setAttribute(copyTo, attr.value);
} else if (elem.tagName === "FIGURE" && !this._getAllNodesWithTag(elem, ["img", "picture"]).length) {
//if the item is a <figure> that does not contain an image or picture, create one and place it inside the figure
//see the nytimes-3 testcase for an example
var img = this._doc.createElement("img");
img.setAttribute(copyTo, attr.value);
elem.appendChild(img);
}
}
// Here we assume if image is less than 100 bytes (or 133B after encoded to base64)
// it will be too small, therefore it might be placeholder image.
if (srcCouldBeRemoved) {
var b64starts = elem.src.search(/base64\s*/i) + 7;
var b64length = elem.src.length - b64starts;
if (b64length < 133) {
elem.removeAttribute("src");
}
}
}
// also check for "null" to work around https://github.com/jsdom/jsdom/issues/2580
if ((elem.src || (elem.srcset && elem.srcset != "null")) && elem.className.toLowerCase().indexOf("lazy") === -1) {
return;
}
for (var j = 0; j < elem.attributes.length; j++) {
attr = elem.attributes[j];
if (attr.name === "src" || attr.name === "srcset") {
continue;
}
var copyTo = null;
if (/\.(jpg|jpeg|png|webp)\s+\d/.test(attr.value)) {
copyTo = "srcset";
} else if (/^\s*\S+\.(jpg|jpeg|png|webp)\S*\s*$/.test(attr.value)) {
copyTo = "src";
}
if (copyTo) {
//if this is an img or picture, set the attribute directly
if (elem.tagName === "IMG" || elem.tagName === "PICTURE") {
elem.setAttribute(copyTo, attr.value);
} else if (elem.tagName === "FIGURE" && !this._getAllNodesWithTag(elem, ["img", "picture"]).length) {
//if the item is a <figure> that does not contain an image or picture, create one and place it inside the figure
//see the nytimes-3 testcase for an example
var img = this._doc.createElement("img");
img.setAttribute(copyTo, attr.value);
elem.appendChild(img);
}
}
}
@@ -1828,12 +2154,18 @@ Readability.prototype = {
}
}
// Unwrap image from noscript
this._unwrapNoscriptImages(this._doc);
// Extract JSON-LD metadata before removing scripts
var jsonLd = this._disableJSONLD ? {} : this._getJSONLD(this._doc);
// Remove script tags from the document.
this._removeScripts(this._doc);
this._prepDocument();
var metadata = this._getArticleMetadata();
var metadata = this._getArticleMetadata(jsonLd);
this._articleTitle = metadata.title;
var articleContent = this._grabArticle();
@@ -1859,7 +2191,7 @@ Readability.prototype = {
title: this._articleTitle,
byline: metadata.byline || this._articleByline,
dir: this._articleDir,
content: articleContent.innerHTML,
content: this._serializer(articleContent),
textContent: textContent,
length: textContent.length,
excerpt: metadata.excerpt,
+1
View File
@@ -61,6 +61,7 @@ toolkit.jar:
content/global/select-child.js
content/global/TopLevelVideoDocument.js
content/global/treeUtils.js
content/global/jsToggle.js
content/global/viewZoomOverlay.js
*+ content/global/bindings/autocomplete.xml (widgets/autocomplete.xml)
content/global/bindings/browser.xml (widgets/browser.xml)
+36
View File
@@ -0,0 +1,36 @@
// -*- indent-tabs-mode: nil; js-indent-level: 2 -*-
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
var jsToggle = {
get _prefBranch() {
delete this._prefBranch;
return this._prefBranch = Components.classes["@mozilla.org/preferences-service;1"]
.getService(Components.interfaces.nsIPrefBranch);
},
get executeJs() {
return this._prefBranch.getBoolPref("javascript.enabled");
},
set executeJs(aVal) {
this._prefBranch.setBoolPref("javascript.enabled", aVal);
return aVal;
},
updateMenu: function jsToggle_updateMenu() {
var menuItem = document.getElementById("toggle_javascript");
menuItem.setAttribute("checked", this.executeJs);
},
toggle: function jsToggle_toggle() {
this.executeJs = !this.executeJs;
}
};