Files
palemoon27/dom/media/encoder/OpusTrackEncoder.cpp
T
roytam1 605fde2bb1 import changes from `dev' branch of rmottola/Arctic-Fox:
- Bug 1236120: [ffmpeg] Use demuxed dimensions to determine picture size and offset. r=gerald (f336c84d0d)
- Bug 851530: Part 1 - Added support for decoding uLaw and aLaw enconded wave files. r=jya (72683b69f2)
- Bug 851530: Part 2 - Added test cases for uLaw and aLaw wave files. r=jya (ddf431bd28)
- Bug 524109 - Added support for 24 bit wav files. r=cpearce (ebfac16a20)
- Bug 864780 - Changed handling of the format chunk to skip any extension. r=cpearce (40903839a8)
- Bug 1229742: P1. Only clear EOS flag if we have new data. r=gerald (83e69375e4)
- Bug 1229742: P2. Don't reject data promise if new data is pending. r=gerald (89d8222809)
- Bug 1237809: P2. Detect change of display size. r=cpearce (ec529e2732)
- Bug 1237809: [h264] P1. Ensure correct video dimensions are passed to the decoder. r=cpearce (eb241cad10)
- Bug 1237809: P3. Ensure element dimensions are up to date. r=jwwang (541e6e74d7)
- Bug 1244639: P1. Don't assume MP3 decoding always starts at 0. r=cpearce (b62c27bd04)
- Bug 1244639: P2. Don't clamp audio time to seek time if there's no video track. r=cpearce (4720bacc24)
- Bug 1205927 - Part 1: [MediaEncoder] Support *.3g2 with EVRC audio format. r=ayang (bbd8aff9d9)
- Bug 1205927 - Part 2: Add audio-capture:3gpp2 perimission for certificated and privileged application. r=ayang (63b337e2ab)
- Bug 1198157 - Call |NotifyEndOfStream| if the encoder can't been initialized after 30 seconds. r=jwwang (2c21203d36)
- minor NL (14d757753b)
- Bug 1182426 - Add some asserts to VP8TrackEncoder for sanity. r=roc (79ff4869ca)
- Bug 1154213 - Handle timestamps of video/webm vorbis track encoding. r=mreavy, r=rjesup (a9897e4873)
- Bug 1137151: Marked destructor of |MuxerOperation| as protected, r=sotaro (94fdcf6457)
- Bug 1210232 - Let MP4Decoder handle 3GPP files on B2G. r=cpearce (95a77023ad)
- Bug 1227790 - Update GMP API to include new MediaKeyStatus types. r=jwwang (1a6933f2df)
- bits of Bug 1186375 - Add GMP EME (6b99660146)
- Bug 1221825: Fix logging arguments. r=cpearce (5261e34713)
- Bug 1244442 - Warn about Proxy.create and Proxy.createFunction. r=Waldo (718aa94f5b)
- fix spaces (720e2114f7)
- re-apply Bug 1231224 part 7 - Fix some more places to handle OOM. r=jonco (a4af46894b)
- Bug 1246122 - Don't crash in InvokeInterruptCallback if there are no JS scripts on the stack. r=shu (9702df89bc)
- Bug 1246607: Recover from OOM in AddClearDefiniteGetterSetterForPrototypeChain; r=jandem (d83c6c6c9b)
- Bug 1236546 - Don't deoptimize in ObjectGroup::defaultNewGroup when we have a null proto. r=bhackett (1c2ecc3d09)
- Bug 1249588 - Remove unnecessary type information from RegExpObject. r=jandem (9126e17d94)
- Bug 1245965 - Fix an OOM in ObjectGroup::newPlainObject; r=till (5192c25b53)
- Bug 1240527: Fix tracing of RegExpStaticsObject; r=nbp (b37f2167a5)
- Bug 1248094 - Followup to fix a typo; r=fitzgen (8afec429d9)
- Bug 1248726 - Simplify PCLocationMap even further; r=fitzgen (39f0b54a04)
- Bug 1241311 - Pre-tenure SavedFrame objects. r=terrence (b703f3d78e)
- Bug 1241249 - Add an SPS pseudo entry for JS stack capturing; r=shu (c2ae4ee5c2)
- Bug 1247299 - Force SavedFrame columns to be 0 in JS_MORE_DETERMINISTIC builds; r=sfink (09b9038448)
- Bug 1241701 - Add about:memory reporting for js::SavedStacks::pcLocationMap. r=njn (b663d911fc)
- Bug 1166234 - Throw on accessing optimized out values when using Debugger.Frame.prototype.eval. (r=jimb) (19b43b137b)
- Bug 1232655 - Fix DebugScopeProxy::has to not lookup .this on non-function scopes. r=shu (3959e98752)
- Bug 1216261 - Fix OOM handling of DebugScopes. (r=jonco) (0f8b856ee6)
- reorder after mispatch (c292050275)
- Bug 1235656 - Followup: Allow extended functions with guessed atoms in self-hosted code. (rs=arai) (a67286cd52)
- Bug 1245048: Check call to GetPrototype; r=till (35dbbdc025)
- Bug 1132630 - Renumber steps in Function.prototype.bind. r=till (9f11a5a086)
- Bug 1246131 - Provide 'dbg(msg)' debug printing utility function for self-hosted code. r=jandem (f436eeb481)
- Bug 1246131 - Part 2: Let opt builds compile again, even on a CLOSED TREE. r=bustage (8483b77541)
- Bug 1247934 - Handle receiving unboxed exports array from self hosted module code r=shu (fb9c296909)
- Bug 1246134 - Fix loading of external self-hosted JS using MOZ_SELFHOSTEDJS. r=efaust (c96059b40a)
- Bug 1220502 - ignore not visible text nodes for tree update, r=tbsaunde, roc (eed078abc6)
- Bug 1242989 - keep content insertions in a hash, r=tbsaunde (d58fc948a6)
- bug 1228400 - null check tabChild before notifying the parent process about new child documents r=davidb (09512e6287)
- Bug 1239051 - Labels should expose labeled controllers action. r=tbsaunde (c0d4d801a9)
- bug 1243077 - make xpcAccessible::GetFirstChild() work with proxies r=davidb (f539fafe93)
- bug 1243077 - make xpcAccessible::GetLastChild() work with proxied accessibles r=davidb (13716f7cc0)
- bug 1243077 - implement xpcAccessible::GetChildCount() for proxied accessibles (b8f4598834)
- bug 1243077 - make xpcAccessible::GetChildAt() work with proxied accessibles r=davidb (cce0924f7b)
- bug 1243077 - make xpcAccessible::GetChildren() work with proxied accessibles r=davidb (a9ec2b1588)
- bug 1243077 - remove an unnecessary AddRef() from xpcAccessible::GetChildren() (89a58ac2e0)
- bug 1243077 - support proxied accessibles in xpcAccessible::GetRole() r=davidb (2d2a2926c7)
- Bug 1246768 - part 1: argument conversion for Atomics.isLockFree in runtime. r=bbouvier (17f3498b84)
- Bug 1246750 - fix argument ordering to futexWakeOrRequeue + test cases. r=bbouvier (31825e7096)
- Bug 1238911 - initialize canWait with false from constructor, avoid using the variable without initialization. r=lhansen (f4657b3950)
- Bug 1235373 - Add an assert to check validity of pointers: mElement->GetPrimaryFrame() and frame. r=surkov (18023f9238)
- Bug 1241534 - Use TraceRoot for InterpreterFrame fields. r=terrence (98996dc497)
- Bug 1246112 - Fix a bogus assert in InterpreterFrame::initExecuteFrame. r=su (2093ba8a44)
- Bug 1243241 - Make RDTSC monotonic. r=jandem (1450a97a94)
- Bug 1243242 - Don't make structured cloning O(n**2) in the size of the transferables array. r=sfink (aa38dee282)
2023-12-04 22:00:13 +08:00

445 lines
15 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "OpusTrackEncoder.h"
#include "nsString.h"
#include "GeckoProfiler.h"
#include <opus/opus.h>
#undef LOG
#ifdef MOZ_WIDGET_GONK
#include <android/log.h>
#define LOG(args...) __android_log_print(ANDROID_LOG_INFO, "MediaEncoder", ## args);
#else
#define LOG(args, ...)
#endif
namespace mozilla {
// The Opus format supports up to 8 channels, and supports multitrack audio up
// to 255 channels, but the current implementation supports only mono and
// stereo, and downmixes any more than that.
static const int MAX_SUPPORTED_AUDIO_CHANNELS = 8;
// http://www.opus-codec.org/docs/html_api-1.0.2/group__opus__encoder.html
// In section "opus_encoder_init", channels must be 1 or 2 of input signal.
static const int MAX_CHANNELS = 2;
// A maximum data bytes for Opus to encode.
static const int MAX_DATA_BYTES = 4096;
// http://tools.ietf.org/html/draft-ietf-codec-oggopus-00#section-4
// Second paragraph, " The granule position of an audio data page is in units
// of PCM audio samples at a fixed rate of 48 kHz."
static const int kOpusSamplingRate = 48000;
// The duration of an Opus frame, and it must be 2.5, 5, 10, 20, 40 or 60 ms.
static const int kFrameDurationMs = 20;
// The supported sampling rate of input signal (Hz),
// must be one of the following. Will resampled to 48kHz otherwise.
static const int kOpusSupportedInputSamplingRates[] =
{8000, 12000, 16000, 24000, 48000};
namespace {
// An endian-neutral serialization of integers. Serializing T in little endian
// format to aOutput, where T is a 16 bits or 32 bits integer.
template<typename T>
static void
SerializeToBuffer(T aValue, nsTArray<uint8_t>* aOutput)
{
for (uint32_t i = 0; i < sizeof(T); i++) {
aOutput->AppendElement((uint8_t)(0x000000ff & (aValue >> (i * 8))));
}
}
static inline void
SerializeToBuffer(const nsCString& aComment, nsTArray<uint8_t>* aOutput)
{
// Format of serializing a string to buffer is, the length of string (32 bits,
// little endian), and the string.
SerializeToBuffer((uint32_t)(aComment.Length()), aOutput);
aOutput->AppendElements(aComment.get(), aComment.Length());
}
static void
SerializeOpusIdHeader(uint8_t aChannelCount, uint16_t aPreskip,
uint32_t aInputSampleRate, nsTArray<uint8_t>* aOutput)
{
// The magic signature, null terminator has to be stripped off from strings.
static const uint8_t magic[] = "OpusHead";
aOutput->AppendElements(magic, sizeof(magic) - 1);
// The version must always be 1 (8 bits, unsigned).
aOutput->AppendElement(1);
// Number of output channels (8 bits, unsigned).
aOutput->AppendElement(aChannelCount);
// Number of samples (at 48 kHz) to discard from the decoder output when
// starting playback (16 bits, unsigned, little endian).
SerializeToBuffer(aPreskip, aOutput);
// The sampling rate of input source (32 bits, unsigned, little endian).
SerializeToBuffer(aInputSampleRate, aOutput);
// Output gain, an encoder should set this field to zero (16 bits, signed,
// little endian).
SerializeToBuffer((int16_t)0, aOutput);
// Channel mapping family. Family 0 allows only 1 or 2 channels (8 bits,
// unsigned).
aOutput->AppendElement(0);
}
static void
SerializeOpusCommentHeader(const nsCString& aVendor,
const nsTArray<nsCString>& aComments,
nsTArray<uint8_t>* aOutput)
{
// The magic signature, null terminator has to be stripped off.
static const uint8_t magic[] = "OpusTags";
aOutput->AppendElements(magic, sizeof(magic) - 1);
// The vendor; Should append in the following order:
// vendor string length (32 bits, unsigned, little endian)
// vendor string.
SerializeToBuffer(aVendor, aOutput);
// Add comments; Should append in the following order:
// comment list length (32 bits, unsigned, little endian)
// comment #0 string length (32 bits, unsigned, little endian)
// comment #0 string
// comment #1 string length (32 bits, unsigned, little endian)
// comment #1 string ...
SerializeToBuffer((uint32_t)aComments.Length(), aOutput);
for (uint32_t i = 0; i < aComments.Length(); ++i) {
SerializeToBuffer(aComments[i], aOutput);
}
}
} // Anonymous namespace.
OpusTrackEncoder::OpusTrackEncoder()
: AudioTrackEncoder()
, mEncoder(nullptr)
, mLookahead(0)
, mResampler(nullptr)
{
}
OpusTrackEncoder::~OpusTrackEncoder()
{
if (mEncoder) {
opus_encoder_destroy(mEncoder);
}
if (mResampler) {
speex_resampler_destroy(mResampler);
mResampler = nullptr;
}
}
nsresult
OpusTrackEncoder::Init(int aChannels, int aSamplingRate)
{
// This monitor is used to wake up other methods that are waiting for encoder
// to be completely initialized.
ReentrantMonitorAutoEnter mon(mReentrantMonitor);
NS_ENSURE_TRUE((aChannels <= MAX_SUPPORTED_AUDIO_CHANNELS) && (aChannels > 0),
NS_ERROR_FAILURE);
// This version of encoder API only support 1 or 2 channels,
// So set the mChannels less or equal 2 and
// let InterleaveTrackData downmix pcm data.
mChannels = aChannels > MAX_CHANNELS ? MAX_CHANNELS : aChannels;
// Reject non-audio sample rates.
NS_ENSURE_TRUE(aSamplingRate >= 8000, NS_ERROR_INVALID_ARG);
NS_ENSURE_TRUE(aSamplingRate <= 192000, NS_ERROR_INVALID_ARG);
// According to www.opus-codec.org, creating an opus encoder requires the
// sampling rate of source signal be one of 8000, 12000, 16000, 24000, or
// 48000. If this constraint is not satisfied, we resample the input to 48kHz.
nsTArray<int> supportedSamplingRates;
supportedSamplingRates.AppendElements(kOpusSupportedInputSamplingRates,
ArrayLength(kOpusSupportedInputSamplingRates));
if (!supportedSamplingRates.Contains(aSamplingRate)) {
int error;
mResampler = speex_resampler_init(mChannels,
aSamplingRate,
kOpusSamplingRate,
SPEEX_RESAMPLER_QUALITY_DEFAULT,
&error);
if (error != RESAMPLER_ERR_SUCCESS) {
return NS_ERROR_FAILURE;
}
}
mSamplingRate = aSamplingRate;
NS_ENSURE_TRUE(mSamplingRate > 0, NS_ERROR_FAILURE);
int error = 0;
mEncoder = opus_encoder_create(GetOutputSampleRate(), mChannels,
OPUS_APPLICATION_AUDIO, &error);
mInitialized = (error == OPUS_OK);
if (mAudioBitrate) {
opus_encoder_ctl(mEncoder, OPUS_SET_BITRATE(static_cast<int>(mAudioBitrate)));
}
mReentrantMonitor.NotifyAll();
return error == OPUS_OK ? NS_OK : NS_ERROR_FAILURE;
}
int
OpusTrackEncoder::GetOutputSampleRate()
{
return mResampler ? kOpusSamplingRate : mSamplingRate;
}
int
OpusTrackEncoder::GetPacketDuration()
{
return GetOutputSampleRate() * kFrameDurationMs / 1000;
}
already_AddRefed<TrackMetadataBase>
OpusTrackEncoder::GetMetadata()
{
PROFILER_LABEL("OpusTrackEncoder", "GetMetadata",
js::ProfileEntry::Category::OTHER);
{
// Wait if mEncoder is not initialized.
ReentrantMonitorAutoEnter mon(mReentrantMonitor);
while (!mCanceled && !mInitialized) {
mReentrantMonitor.Wait();
}
}
if (mCanceled || mEncodingComplete) {
return nullptr;
}
RefPtr<OpusMetadata> meta = new OpusMetadata();
mLookahead = 0;
int error = opus_encoder_ctl(mEncoder, OPUS_GET_LOOKAHEAD(&mLookahead));
if (error != OPUS_OK) {
mLookahead = 0;
}
// The ogg time stamping and pre-skip is always timed at 48000.
SerializeOpusIdHeader(mChannels, mLookahead * (kOpusSamplingRate /
GetOutputSampleRate()), mSamplingRate,
&meta->mIdHeader);
nsCString vendor;
vendor.AppendASCII(opus_get_version_string());
nsTArray<nsCString> comments;
comments.AppendElement(NS_LITERAL_CSTRING("ENCODER=Mozilla" MOZ_APP_UA_VERSION));
SerializeOpusCommentHeader(vendor, comments,
&meta->mCommentHeader);
return meta.forget();
}
nsresult
OpusTrackEncoder::GetEncodedTrack(EncodedFrameContainer& aData)
{
PROFILER_LABEL("OpusTrackEncoder", "GetEncodedTrack",
js::ProfileEntry::Category::OTHER);
{
ReentrantMonitorAutoEnter mon(mReentrantMonitor);
// Wait until initialized or cancelled.
while (!mCanceled && !mInitialized) {
mReentrantMonitor.Wait();
}
if (mCanceled || mEncodingComplete) {
return NS_ERROR_FAILURE;
}
}
// calculation below depends on the truth that mInitialized is true.
MOZ_ASSERT(mInitialized);
// re-sampled frames left last time which didn't fit into an Opus packet duration.
const int framesLeft = mResampledLeftover.Length() / mChannels;
// When framesLeft is 0, (GetPacketDuration() - framesLeft) is a multiple
// of kOpusSamplingRate. There is not precision loss in the integer division
// in computing framesToFetch. If frameLeft > 0, we need to add 1 to
// framesToFetch to ensure there will be at least n frames after re-sampling.
const int frameRoundUp = framesLeft ? 1 : 0;
MOZ_ASSERT(GetPacketDuration() >= framesLeft);
// Try to fetch m frames such that there will be n frames
// where (n + frameLeft) >= GetPacketDuration() after re-sampling.
const int framesToFetch = !mResampler ? GetPacketDuration()
: (GetPacketDuration() - framesLeft) * mSamplingRate / kOpusSamplingRate
+ frameRoundUp;
{
// Move all the samples from mRawSegment to mSourceSegment. We only hold
// the monitor in this block.
ReentrantMonitorAutoEnter mon(mReentrantMonitor);
// Wait until enough raw data, end of stream or cancelled.
while (!mCanceled && mRawSegment.GetDuration() +
mSourceSegment.GetDuration() < framesToFetch &&
!mEndOfStream) {
mReentrantMonitor.Wait();
}
if (mCanceled || mEncodingComplete) {
return NS_ERROR_FAILURE;
}
mSourceSegment.AppendFrom(&mRawSegment);
// Pad |mLookahead| samples to the end of source stream to prevent lost of
// original data, the pcm duration will be calculated at rate 48K later.
if (mEndOfStream && !mEosSetInEncoder) {
mEosSetInEncoder = true;
mSourceSegment.AppendNullData(mLookahead);
}
}
// Start encoding data.
AutoTArray<AudioDataValue, 9600> pcm;
pcm.SetLength(GetPacketDuration() * mChannels);
AudioSegment::ChunkIterator iter(mSourceSegment);
int frameCopied = 0;
while (!iter.IsEnded() && frameCopied < framesToFetch) {
AudioChunk chunk = *iter;
// Chunk to the required frame size.
int frameToCopy = chunk.GetDuration();
if (frameCopied + frameToCopy > framesToFetch) {
frameToCopy = framesToFetch - frameCopied;
}
if (!chunk.IsNull()) {
// Append the interleaved data to the end of pcm buffer.
AudioTrackEncoder::InterleaveTrackData(chunk, frameToCopy, mChannels,
pcm.Elements() + frameCopied * mChannels);
} else {
memset(pcm.Elements() + frameCopied * mChannels, 0,
frameToCopy * mChannels * sizeof(AudioDataValue));
}
frameCopied += frameToCopy;
iter.Next();
}
RefPtr<EncodedFrame> audiodata = new EncodedFrame();
audiodata->SetFrameType(EncodedFrame::OPUS_AUDIO_FRAME);
int framesInPCM = frameCopied;
if (mResampler) {
AutoTArray<AudioDataValue, 9600> resamplingDest;
// We want to consume all the input data, so we slightly oversize the
// resampled data buffer so we can fit the output data in. We cannot really
// predict the output frame count at each call.
uint32_t outframes = frameCopied * kOpusSamplingRate / mSamplingRate + 1;
uint32_t inframes = frameCopied;
resamplingDest.SetLength(outframes * mChannels);
#if MOZ_SAMPLE_TYPE_S16
short* in = reinterpret_cast<short*>(pcm.Elements());
short* out = reinterpret_cast<short*>(resamplingDest.Elements());
speex_resampler_process_interleaved_int(mResampler, in, &inframes,
out, &outframes);
#else
float* in = reinterpret_cast<float*>(pcm.Elements());
float* out = reinterpret_cast<float*>(resamplingDest.Elements());
speex_resampler_process_interleaved_float(mResampler, in, &inframes,
out, &outframes);
#endif
MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length());
PodCopy(pcm.Elements(), mResampledLeftover.Elements(),
mResampledLeftover.Length());
uint32_t outframesToCopy = std::min(outframes,
static_cast<uint32_t>(GetPacketDuration() - framesLeft));
MOZ_ASSERT(pcm.Length() - mResampledLeftover.Length() >=
outframesToCopy * mChannels);
PodCopy(pcm.Elements() + mResampledLeftover.Length(),
resamplingDest.Elements(), outframesToCopy * mChannels);
int frameLeftover = outframes - outframesToCopy;
mResampledLeftover.SetLength(frameLeftover * mChannels);
PodCopy(mResampledLeftover.Elements(),
resamplingDest.Elements() + outframesToCopy * mChannels,
mResampledLeftover.Length());
// This is always at 48000Hz.
framesInPCM = framesLeft + outframesToCopy;
audiodata->SetDuration(framesInPCM);
} else {
// The ogg time stamping and pre-skip is always timed at 48000.
audiodata->SetDuration(frameCopied * (kOpusSamplingRate / mSamplingRate));
}
// Remove the raw data which has been pulled to pcm buffer.
// The value of frameCopied should equal to (or smaller than, if eos)
// GetPacketDuration().
mSourceSegment.RemoveLeading(frameCopied);
// Has reached the end of input stream and all queued data has pulled for
// encoding.
if (mSourceSegment.GetDuration() == 0 && mEndOfStream) {
mEncodingComplete = true;
LOG("[Opus] Done encoding.");
}
MOZ_ASSERT(mEndOfStream || framesInPCM == GetPacketDuration());
// Append null data to pcm buffer if the leftover data is not enough for
// opus encoder.
if (framesInPCM < GetPacketDuration() && mEndOfStream) {
PodZero(pcm.Elements() + framesInPCM * mChannels,
(GetPacketDuration() - framesInPCM) * mChannels);
}
nsTArray<uint8_t> frameData;
// Encode the data with Opus Encoder.
frameData.SetLength(MAX_DATA_BYTES);
// result is returned as opus error code if it is negative.
int result = 0;
#ifdef MOZ_SAMPLE_TYPE_S16
const opus_int16* pcmBuf = static_cast<opus_int16*>(pcm.Elements());
result = opus_encode(mEncoder, pcmBuf, GetPacketDuration(),
frameData.Elements(), MAX_DATA_BYTES);
#else
const float* pcmBuf = static_cast<float*>(pcm.Elements());
result = opus_encode_float(mEncoder, pcmBuf, GetPacketDuration(),
frameData.Elements(), MAX_DATA_BYTES);
#endif
frameData.SetLength(result >= 0 ? result : 0);
if (result < 0) {
LOG("[Opus] Fail to encode data! Result: %s.", opus_strerror(result));
}
if (mEncodingComplete) {
if (mResampler) {
speex_resampler_destroy(mResampler);
mResampler = nullptr;
}
mResampledLeftover.SetLength(0);
}
audiodata->SwapInFrameData(frameData);
aData.AppendEncodedFrame(audiodata);
return result >= 0 ? NS_OK : NS_ERROR_FAILURE;
}
} // namespace mozilla