Parse raw data to identify single frames before decoding

A VP9 or VP9 packet may contains alternate frames. They need to be fed separately to the FFmpeg decoder.
2026-05-26 14:18:48 +00:00 · 2016-12-07 05:16:12 -06:00
parent 7b9bcb2286
commit eb5ceada68
4 changed files with 64 additions and 4 deletions
@@ -44,12 +44,11 @@ protected:
  AVCodecContext* mCodecContext;
  AVFrame*        mFrame;
  nsRefPtr<MediaByteBuffer> mExtraData;
+  AVCodecID mCodecID;

 private:
  static bool sFFmpegInitDone;
  static StaticMutex sMonitor;
-
-  AVCodecID mCodecID;
 };

 } // namespace mozilla
@@ -17,6 +17,9 @@ AV_FUNC(avcodec_get_edge_width, 0)
 AV_FUNC(avcodec_open2, 0)
 AV_FUNC(av_init_packet, 0)
 AV_FUNC(av_dict_get, 0)
+AV_FUNC(av_parser_init, 0)
+AV_FUNC(av_parser_close, 0)
+AV_FUNC(av_parser_parse2, 0)
 AV_FUNC(avcodec_version, 0)
 AV_FUNC(avcodec_register_all, 0)

@@ -71,6 +71,7 @@ FFmpegH264Decoder<LIBAV_VER>::FFmpegH264Decoder(
  , mPictureHeight(aConfig.mImage.height)
  , mDisplayWidth(aConfig.mDisplay.width)
  , mDisplayHeight(aConfig.mDisplay.height)
+  , mCodecParser(nullptr)
 {
  MOZ_COUNT_CTOR(FFmpegH264Decoder);
  // Use a new MediaByteBuffer as the object will be modified during initialization.
@@ -94,12 +95,63 @@ FFmpegH264Decoder<LIBAV_VER>::Init()

 FFmpegH264Decoder<LIBAV_VER>::DecodeResult
 FFmpegH264Decoder<LIBAV_VER>::DoDecodeFrame(MediaRawData* aSample)
+{
+  uint8_t* inputData = const_cast<uint8_t*>(aSample->Data());
+  size_t inputSize = aSample->Size();
+
+  if (inputSize && (mCodecID == AV_CODEC_ID_VP8
+#if LIBAVCODEC_VERSION_MAJOR >= 55
+      || mCodecID == AV_CODEC_ID_VP9
+#endif
+      )) {
+    if (!mCodecParser) {
+      mCodecParser = av_parser_init(mCodecID);
+      if (!mCodecParser) {
+        mCallback->Error();
+        return DecodeResult::DECODE_ERROR;
+      }
+      mCodecParser->flags |= PARSER_FLAG_COMPLETE_FRAMES;
+    }
+    bool gotFrame = false;
+    while (inputSize) {
+      uint8_t* data;
+      int size;
+      int len = av_parser_parse2(mCodecParser, mCodecContext, &data, &size,
+                                 inputData, inputSize,
+                                 aSample->mTime, aSample->mTimecode,
+                                 aSample->mOffset);
+      if (size_t(len) > inputSize) {
+        mCallback->Error();
+        return DecodeResult::DECODE_ERROR;
+      }
+      inputData += len;
+      inputSize -= len;
+      if (size) {
+        switch (DoDecodeFrame(aSample, data, size)) {
+          case DecodeResult::DECODE_ERROR:
+            return DecodeResult::DECODE_ERROR;
+          case DecodeResult::DECODE_FRAME:
+            gotFrame = true;
+            break;
+          default:
+            break;
+        }
+      }
+    }
+    return gotFrame ? DecodeResult::DECODE_FRAME : DecodeResult::DECODE_NO_FRAME;
+  }
+  return DoDecodeFrame(aSample, inputData, inputSize);
+}
+
+FFmpegH264Decoder<LIBAV_VER>::DecodeResult
+FFmpegH264Decoder<LIBAV_VER>::DoDecodeFrame(MediaRawData* aSample,
+                                            uint8_t* aData, int aSize)
 {
  AVPacket packet;
  av_init_packet(&packet);

-  packet.data = const_cast<uint8_t*>(aSample->Data());
-  packet.size = aSample->Size();
+  packet.data = aData;
+  packet.size = aSize;
  packet.dts = aSample->mTimecode;
  packet.pts = aSample->mTime;
  packet.flags = aSample->mKeyframe ? AV_PKT_FLAG_KEY : 0;
@@ -365,6 +417,10 @@ FFmpegH264Decoder<LIBAV_VER>::Flush()
 FFmpegH264Decoder<LIBAV_VER>::~FFmpegH264Decoder()
 {
  MOZ_COUNT_DTOR(FFmpegH264Decoder);
+  if (mCodecParser) {
+    av_parser_close(mCodecParser);
+    mCodecParser = nullptr;
+  }
 }

 AVCodecID
@@ -47,6 +47,7 @@ public:
 private:
  void DecodeFrame(MediaRawData* aSample);
  DecodeResult DoDecodeFrame(MediaRawData* aSample);
+  DecodeResult DoDecodeFrame(MediaRawData* aSample, uint8_t* aData, int aSize);
  void DoDrain();
  void OutputDelayedFrames();

@@ -68,6 +69,7 @@ private:
  uint32_t mPictureHeight;
  uint32_t mDisplayWidth;
  uint32_t mDisplayHeight;
+  AVCodecParserContext* mCodecParser;

  class PtsCorrectionContext {
  public: