Files
palemoon27/toolkit/components/terminator/nsTerminator.cpp
T
roytam1 56aad8a83e import change from rmottola/Arctic-Fox:
- Bug 1149987 - Part 2: Make ErrorResult unassignable; r=bzbarsky (32661559b)
- Bug 1149987 - Part 3: Give ErrorResult a move constructor and a move assignment operator; (27f4c6125)
- Bug 1149987 - Part 4: Do not attempt to delete ErrorResult::mMessage when deserializing the object from IPDL; r=bzbarsky (0f9dcc603)
- Bug 1110485 P0 Add an ErrorResult constructor that takes nsresult. (72a779666)
- Bug 1110485 P1 Refactor Cache IPC requests to use a separate actor. (a7e4c1959)
- Bug 1127914 - Part 1 - Duplicate keyed histograms for double submission. (78673277f)
- Bug 1127914 - Part 2 - Duplicate normal histograms for double submission. (55c302057)
- Bug 1127914 - Part 3 - Submit duplicate histogram data for 'non-classic' telemetry sessions. r=vladan (bb3e49c43)
- Bug 1120362 - Part 1 - Enable snapshotting and clearing subsession histograms. (14378a6e5)
- Bug 1120362 - Part 2 - Enable snapshotting and clearing keyed subsession histograms. r=vladan (c0e0bfb3e)
- partial apply of Bug 1119281 - Fix missing telemetry client id (ae0dc0194)
- Bug 1122047 - Part 1 - Sketch out Telemetry environment module. (0419391b0)
- Bug 1122047 - Part 2 - Make TelemetryPing shutdown properly on delayed initialization (0102cef09)
- Bug 1122061 - Give TelemetryPing a common API for sending pings. (999cb825d)
- Bug 1122061 - Move TelemetrySession tests out of test_telemetryPing.js. (2d5b61de1)
- Bug 1120362 - Part 3 - Reset subsession histograms on telemetry payload collections. r=vladan (0d3f04df1)
- Bug 1120362 - Part 4 - Start new telemetry subsessions on local midnight. r=vladan (93eb9ca21)
- Bug 1120363 - Break up Telemetry sessions on environment changes. (a7c8d70c7)
- Bug 1122052 - Remove duplicated data from TelemetrySession. (bb905d602)
- Bug 1122050 - Remove persona and experiment data from TelemetrySession. (40ca59a9e)
- Bug 1134268 - Part 1 - Fix and order Telemetry shutdown for TelemetryPing and TelemetrySession. r=yoric (30d0f0656)
- Bug 1134268 - Part 2 - Fixup TelemetryEnvironment shutdown if the module wasnt initialized. r=vladan (ec2875fea)
- Bug 1135076 - Missing histograms in childPayloads. r=vladan (9f317cf9d)
- Bug 1134279 - Make TelemetryPing and TelemetrySession code use the "FHR enabled" & "Telemetry enabled" prefs properly. r=vladan (4050d7f24)
- Bug 1128768: Part 1 - Modify IPC to allow retrieval of topmost routing id on the stack; (cd2e8a2f0)
- Bug 1129249 - Add a "restyle" feature to profiler and split the style label in Cleopatra based on the restyleSource, r=dholbert,mstange (b37df94d1)
- Bug 1150684: Remove XPCOM.h from IOInterposer.h (5b7e1cef3)
- Bug 1093934 - Create a XPCOM library that can be used to support standalone WebRTC. (9ec8a819f)
- Merge branch 'master' of https://github.com/rmottola/Arctic-Fox (d0f05eea4)
- Bug 1128768: Part 2,3,4 - Refactor hang annotation code; (f5086aba9) (with xpcom/threads/ fixes for my tele-removed tree)
- Bug 1128768: Part 5 - Update plugin code to retrieve SWF file for hang annotations; (774a47aec)
- Bug 1110485 P2 Remove 'P' prefix from non-protocol IPC types in Cache API. r=baku (ea29a10cf)
- Bug 1110485 P3 Move Fetch IPC PHeaderEntry type to Cache. Rename HeadesEntry. (9eba0aca0)
- Bug 1110485 P4 Keep Cache Actors alive during async operations. (eb75f2316)
- Bug 1110485 P5 Replace useless DBSchema class type with namespace. (159b902db)
- Bug 1110485 P6 Remove useless cache::FileUtils type (1bdf00fc3)
- Bug 1110485 P7 Rename DeleteCache() to DeleteCacheId() better distinguish it from CacheDelete(). (5199f9d6f)
- Bug 1110485 P8 Correctly set the Feature on the stream control child actor. (c8673cb13)
- Bug 1150691 Fix Cache API race with storage invalidation. (2723dff50)
- Bug 1151892 Refactor Cache Manager Context usage to be more sane and fix shutdown assert. r=ehsan (ea96381cf)
- Bug 1136331 - OdinMonkey: allow stdlib calls in heap expressions (2fc5e2bfd)
- Bug 1141439 - Exit with an error code instead of falling through the REMOTE_NOT_FOUND code path when the X-remote returns an explicit command line handler error. (afcf9b1aa)
- Bug 1135825: Add missing MOZ_OVERRIDE annotation in RTCIdentityProviderRegistrar.h (e8beec4e8)
- (Bug 1135138 is not merged due to broken build)
2019-07-25 21:33:11 +08:00

530 lines
15 KiB
C++

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ :
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/**
* A watchdog designed to terminate shutdown if it lasts too long.
*
* This watchdog is designed as a worst-case problem container for the
* common case in which Firefox just won't shutdown.
*
* We spawn a thread during quit-application. If any of the shutdown
* steps takes more than n milliseconds (63000 by default), kill the
* process as fast as possible, without any cleanup.
*/
#include "nsTerminator.h"
#include "prthread.h"
#include "prmon.h"
#include "plstr.h"
#include "prio.h"
#include "nsString.h"
#include "nsServiceManagerUtils.h"
#include "nsDirectoryServiceUtils.h"
#include "nsAppDirectoryServiceDefs.h"
#include "nsIObserverService.h"
#include "nsIPrefService.h"
#include "mozilla/ArrayUtils.h"
#include "mozilla/Attributes.h"
#include "mozilla/DebugOnly.h"
#include "mozilla/MemoryChecking.h"
#include "mozilla/Preferences.h"
#include "mozilla/Services.h"
#include "mozilla/UniquePtr.h"
#include "mozilla/unused.h"
#include "mozilla/Telemetry.h"
// Normally, the number of milliseconds that AsyncShutdown waits until
// it decides to crash is specified as a preference. We use the
// following value as a fallback if for some reason the preference is
// absent.
#define FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS 60000
// Additional number of milliseconds to wait until we decide to exit
// forcefully.
#define ADDITIONAL_WAIT_BEFORE_CRASH_MS 3000
// One second, in ticks.
#define TICK_DURATION 1000
namespace mozilla {
namespace {
// Utility function: create a thread that is non-joinable,
// does not prevent the process from terminating, is never
// cooperatively scheduled, and uses a default stack size.
PRThread* CreateSystemThread(void (*start)(void* arg),
void* arg)
{
PRThread* thread = PR_CreateThread(
PR_SYSTEM_THREAD, /* This thread will not prevent the process from terminating */
start,
arg,
PR_PRIORITY_LOW,
PR_GLOBAL_THREAD /* Make sure that the thread is never cooperatively scheduled */,
PR_UNJOINABLE_THREAD,
0 /* Use default stack size */
);
MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(thread); // This pointer will never be deallocated.
return thread;
}
////////////////////////////////////////////
//
// The watchdog
//
// This nspr thread is in charge of crashing the process if any stage of shutdown
// lasts more than some predefined duration. As a side-effect, it measures the
// duration of each stage of shutdown.
//
// The heartbeat of the operation.
//
// Main thread:
//
// * Whenever a shutdown step has been completed, the main thread
// swaps gHeartbeat to 0 to mark that the shutdown process is still
// progressing. The value swapped away indicates the number of ticks
// it took for the shutdown step to advance.
//
// Watchdog thread:
//
// * Every tick, the watchdog thread increments gHearbeat atomically.
//
// A note about precision:
// Since gHeartbeat is generally reset to 0 between two ticks, this means
// that gHeartbeat stays at 0 less than one tick. Consequently, values
// extracted from gHeartbeat must be considered rounded up.
Atomic<uint32_t> gHeartbeat(0);
struct Options {
/**
* How many ticks before we should crash the process.
*/
uint32_t crashAfterTicks;
};
/**
* Entry point for the watchdog thread
*/
void
RunWatchdog(void* arg)
{
PR_SetCurrentThreadName("Shutdown Hang Terminator");
// Let's copy and deallocate options, that's one less leak to worry
// about.
UniquePtr<Options> options((Options*)arg);
uint32_t crashAfterTicks = options->crashAfterTicks;
options = nullptr;
const uint32_t timeToLive = crashAfterTicks;
while (true) {
//
// We do not want to sleep for the entire duration,
// as putting the computer to sleep would suddenly
// cause us to timeout on wakeup.
//
// Rather, we prefer sleeping for at most 1 second
// at a time. If the computer sleeps then wakes up,
// we have lost at most one second, which is much
// more reasonable.
//
PR_Sleep(TICK_DURATION);
if (gHeartbeat++ < timeToLive) {
continue;
}
// Shutdown is apparently dead. Crash the process.
MOZ_CRASH("Shutdown too long, probably frozen, causing a crash.");
}
}
////////////////////////////////////////////
//
// Writer thread
//
// This nspr thread is in charge of writing to disk statistics produced by the
// watchdog thread and collected by the main thread. Note that we use a nspr
// thread rather than usual XPCOM I/O simply because we outlive XPCOM and its
// threads.
//
// Utility class, used by UniquePtr<> to close nspr files.
class PR_CloseDelete
{
public:
MOZ_CONSTEXPR PR_CloseDelete() {}
PR_CloseDelete(const PR_CloseDelete& aOther)
{}
void operator()(PRFileDesc* aPtr) const
{
PR_Close(aPtr);
}
};
//
// Communication between the main thread and the writer thread.
//
// Main thread:
//
// * Whenever a shutdown step has been completed, the main thread
// obtains the number of ticks from the watchdog threads, builds
// a string representing all the data gathered so far, places
// this string in `gWriteData`, and wakes up the writer thread
// using `gWriteReady`. If `gWriteData` already contained a non-null
// pointer, this means that the writer thread is lagging behind the
// main thread, and the main thread cleans up the memory.
//
// Writer thread:
//
// * When awake, the writer thread swaps `gWriteData` to nullptr. If
// `gWriteData` contained data to write, the . If so, the writer
// thread writes the data to a file named "ShutdownDuration.json.tmp",
// then moves that file to "ShutdownDuration.json" and cleans up the
// data. If `gWriteData` contains a nullptr, the writer goes to sleep
// until it is awkened using `gWriteReady`.
//
//
// The data written by the writer thread will be read by another
// module upon the next restart and fed to Telemetry.
//
Atomic<nsCString*> gWriteData(nullptr);
PRMonitor* gWriteReady = nullptr;
void RunWriter(void* arg)
{
PR_SetCurrentThreadName("Shutdown Statistics Writer");
MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(arg);
// Shutdown will generally complete before we have a chance to
// deallocate. This is not a leak.
// Setup destinationPath and tmpFilePath
nsCString destinationPath(static_cast<char*>(arg));
nsAutoCString tmpFilePath;
tmpFilePath.Append(destinationPath);
tmpFilePath.AppendLiteral(".tmp");
// Cleanup any file leftover from a previous run
unused << PR_Delete(tmpFilePath.get());
unused << PR_Delete(destinationPath.get());
while (true) {
//
// Check whether we have received data from the main thread.
//
// We perform the check before waiting on `gWriteReady` as we may
// have received data while we were busy writing.
//
// Also note that gWriteData may have been modified several times
// since we last checked. That's ok, we are not losing any important
// data (since we keep adding data), and we are not leaking memory
// (since the main thread deallocates any data that hasn't been
// consumed by the writer thread).
//
UniquePtr<nsCString> data(gWriteData.exchange(nullptr));
if (!data) {
// Data is not available yet.
// Wait until the main thread provides it.
PR_EnterMonitor(gWriteReady);
PR_Wait(gWriteReady, PR_INTERVAL_NO_TIMEOUT);
PR_ExitMonitor(gWriteReady);
continue;
}
MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(data.get());
// Shutdown may complete before we have a chance to deallocate.
// This is not a leak.
//
// Write to a temporary file
//
// In case of any error, we simply give up. Since the data is
// hardly critical, we don't want to spend too much effort
// salvaging it.
//
UniquePtr<PRFileDesc, PR_CloseDelete>
tmpFileDesc(PR_Open(tmpFilePath.get(),
PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE,
00600));
// Shutdown may complete before we have a chance to close the file.
// This is not a leak.
MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(tmpFileDesc.get());
if (tmpFileDesc == nullptr) {
break;
}
if (PR_Write(tmpFileDesc.get(), data->get(), data->Length()) == -1) {
break;
}
tmpFileDesc.reset();
//
// Rename on top of destination file.
//
// This is not sufficient to guarantee that the destination file
// will be written correctly, but, again, we don't care enough
// about the data to make more efforts.
//
if (PR_Rename(tmpFilePath.get(), destinationPath.get()) != PR_SUCCESS) {
break;
}
}
}
/**
* A step during shutdown.
*
* Shutdown is divided in steps, which all map to an observer
* notification. The duration of a step is defined as the number of
* ticks between the time we receive a notification and the next one.
*/
struct ShutdownStep
{
char const* const mTopic;
int mTicks;
MOZ_CONSTEXPR explicit ShutdownStep(const char *const topic)
: mTopic(topic)
, mTicks(-1)
{}
};
static ShutdownStep sShutdownSteps[] = {
ShutdownStep("quit-application"),
ShutdownStep("profile-change-teardown"),
ShutdownStep("profile-before-change"),
ShutdownStep("xpcom-will-shutdown"),
ShutdownStep("xpcom-shutdown"),
};
} // anonymous namespace
NS_IMPL_ISUPPORTS(nsTerminator, nsIObserver)
nsTerminator::nsTerminator()
: mInitialized(false)
, mCurrentStep(-1)
{
}
// During startup, register as an observer for all interesting topics.
nsresult
nsTerminator::SelfInit()
{
nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
if (!os) {
return NS_ERROR_UNEXPECTED;
}
for (size_t i = 0; i < ArrayLength(sShutdownSteps); ++i) {
DebugOnly<nsresult> rv = os->AddObserver(this, sShutdownSteps[i].mTopic, false);
#if defined(DEBUG)
NS_WARN_IF(NS_FAILED(rv));
#endif // defined(DEBUG)
}
return NS_OK;
}
// Actually launch these threads. This takes place at the first sign of shutdown.
void
nsTerminator::Start()
{
MOZ_ASSERT(!mInitialized);
StartWatchdog();
StartWriter();
mInitialized = true;
}
// Prepare, allocate and start the watchdog thread.
// By design, it will never finish, nor be deallocated.
void
nsTerminator::StartWatchdog()
{
int32_t crashAfterMS =
Preferences::GetInt("toolkit.asyncshutdown.crash_timeout",
FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS);
// Ignore negative values
if (crashAfterMS <= 0) {
crashAfterMS = FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS;
}
// Add a little padding, to ensure that we do not crash before
// AsyncShutdown.
if (crashAfterMS > INT32_MAX - ADDITIONAL_WAIT_BEFORE_CRASH_MS) {
// Defend against overflow
crashAfterMS = INT32_MAX;
} else {
crashAfterMS += ADDITIONAL_WAIT_BEFORE_CRASH_MS;
}
UniquePtr<Options> options(new Options());
options->crashAfterTicks = crashAfterMS / TICK_DURATION;
DebugOnly<PRThread*> watchdogThread = CreateSystemThread(RunWatchdog,
options.release());
MOZ_ASSERT(watchdogThread);
}
// Prepare, allocate and start the writer thread. By design, it will never
// finish, nor be deallocated. In case of error, we degrade
// gracefully to not writing Telemetry data.
void
nsTerminator::StartWriter()
{
if (!Telemetry::CanRecordExtended()) {
return;
}
nsCOMPtr<nsIFile> profLD;
nsresult rv = NS_GetSpecialDirectory(NS_APP_USER_PROFILE_LOCAL_50_DIR,
getter_AddRefs(profLD));
if (NS_FAILED(rv)) {
return;
}
rv = profLD->Append(NS_LITERAL_STRING("ShutdownDuration.json"));
if (NS_FAILED(rv)) {
return;
}
nsAutoString path;
rv = profLD->GetPath(path);
if (NS_FAILED(rv)) {
return;
}
gWriteReady = PR_NewMonitor();
MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(gWriteReady); // We will never deallocate this object
PRThread* writerThread = CreateSystemThread(RunWriter,
ToNewUTF8String(path));
if (!writerThread) {
return;
}
}
NS_IMETHODIMP
nsTerminator::Observe(nsISupports *, const char *aTopic, const char16_t *)
{
if (strcmp(aTopic, "profile-after-change") == 0) {
return SelfInit();
}
// Other notifications are shutdown-related.
// As we have seen examples in the wild of shutdown notifications
// not being sent (or not being sent in the expected order), we do
// not assume a specific order.
if (!mInitialized) {
Start();
}
UpdateHeartbeat(aTopic);
UpdateTelemetry();
UpdateCrashReport(aTopic);
// Perform a little cleanup
nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
MOZ_RELEASE_ASSERT(os);
(void)os->RemoveObserver(this, aTopic);
return NS_OK;
}
void
nsTerminator::UpdateHeartbeat(const char* aTopic)
{
// Reset the clock, find out how long the current phase has lasted.
uint32_t ticks = gHeartbeat.exchange(0);
if (mCurrentStep > 0) {
sShutdownSteps[mCurrentStep].mTicks = ticks;
}
// Find out where we now are in the current shutdown.
// Don't assume that shutdown takes place in the expected order.
int nextStep = -1;
for (size_t i = 0; i < ArrayLength(sShutdownSteps); ++i) {
if (strcmp(sShutdownSteps[i].mTopic, aTopic) == 0) {
nextStep = i;
break;
}
}
MOZ_ASSERT(nextStep != -1);
mCurrentStep = nextStep;
}
void
nsTerminator::UpdateTelemetry()
{
if (!Telemetry::CanRecordExtended() || !gWriteReady) {
return;
}
//
// We need Telemetry data on the effective duration of each step,
// to be able to tune the time-to-crash of each of both the
// Terminator and AsyncShutdown. However, at this stage, it is too
// late to record such data into Telemetry, so we write it to disk
// and read it upon the next startup.
//
// Build JSON.
UniquePtr<nsCString> telemetryData(new nsCString());
telemetryData->AppendLiteral("{");
size_t fields = 0;
for (size_t i = 0; i < ArrayLength(sShutdownSteps); ++i) {
if (sShutdownSteps[i].mTicks < 0) {
// Ignore this field.
continue;
}
if (fields++ > 0) {
telemetryData->Append(", ");
}
telemetryData->AppendLiteral("\"");
telemetryData->Append(sShutdownSteps[i].mTopic);
telemetryData->AppendLiteral("\": ");
telemetryData->AppendInt(sShutdownSteps[i].mTicks);
}
telemetryData->AppendLiteral("}");
if (fields == 0) {
// Nothing to write
return;
}
//
// Send data to the worker thread.
//
delete gWriteData.exchange(telemetryData.release()); // Clear any data that hasn't been written yet
// In case the worker thread was sleeping, wake it up.
PR_EnterMonitor(gWriteReady);
PR_Notify(gWriteReady);
PR_ExitMonitor(gWriteReady);
}
void
nsTerminator::UpdateCrashReport(const char* aTopic)
{
}
} // namespace mozilla