1 files changed, 1661 insertions, 0 deletions
diff --git a/src/decoder_impl.cc b/src/decoder_impl.cc
new file mode 100644
index 0000000..751671d
--- /dev/null
+++ b/src/decoder_impl.cc
@@ -0,0 +1,1661 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/decoder_impl.h"
+
+#include <algorithm>
+#include <atomic>
+#include <cassert>
+#include <iterator>
+#include <new>
+#include <utility>
+
+#include "src/dsp/common.h"
+#include "src/dsp/constants.h"
+#include "src/dsp/dsp.h"
+#include "src/film_grain.h"
+#include "src/frame_buffer_utils.h"
+#include "src/frame_scratch_buffer.h"
+#include "src/loop_restoration_info.h"
+#include "src/obu_parser.h"
+#include "src/post_filter.h"
+#include "src/prediction_mask.h"
+#include "src/threading_strategy.h"
+#include "src/utils/blocking_counter.h"
+#include "src/utils/common.h"
+#include "src/utils/constants.h"
+#include "src/utils/logging.h"
+#include "src/utils/parameter_tree.h"
+#include "src/utils/raw_bit_reader.h"
+#include "src/utils/segmentation.h"
+#include "src/utils/threadpool.h"
+#include "src/yuv_buffer.h"
+
+namespace libgav1 {
+namespace {
+
+constexpr int kMaxBlockWidth4x4 = 32;
+constexpr int kMaxBlockHeight4x4 = 32;
+
+// Computes the bottom border size in pixels. If CDEF, loop restoration or
+// SuperRes is enabled, adds extra border pixels to facilitate those steps to
+// happen nearly in-place (a few extra rows instead of an entire frame buffer).
+// The logic in this function should match the corresponding logic for
+// |vertical_shift| in the PostFilter constructor.
+int GetBottomBorderPixels(const bool do_cdef, const bool do_restoration,
+                          const bool do_superres, const int subsampling_y) {
+  int extra_border = 0;
+  if (do_cdef) {
+    extra_border += kCdefBorder;
+  } else if (do_restoration) {
+    // If CDEF is enabled, loop restoration is safe without extra border.
+    extra_border += kRestorationVerticalBorder;
+  }
+  if (do_superres) extra_border += kSuperResVerticalBorder;
+  // Double the number of extra bottom border pixels if the bottom border will
+  // be subsampled.
+  extra_border <<= subsampling_y;
+  return Align(kBorderPixels + extra_border, 2);  // Must be a multiple of 2.
+}
+
+// Sets |frame_scratch_buffer->tile_decoding_failed| to true (while holding on
+// to |frame_scratch_buffer->superblock_row_mutex|) and notifies the first
+// |count| condition variables in
+// |frame_scratch_buffer->superblock_row_progress_condvar|.
+void SetFailureAndNotifyAll(FrameScratchBuffer* const frame_scratch_buffer,
+                            int count) {
+  {
+    std::lock_guard<std::mutex> lock(
+        frame_scratch_buffer->superblock_row_mutex);
+    frame_scratch_buffer->tile_decoding_failed = true;
+  }
+  std::condition_variable* const condvars =
+      frame_scratch_buffer->superblock_row_progress_condvar.get();
+  for (int i = 0; i < count; ++i) {
+    condvars[i].notify_one();
+  }
+}
+
+// Helper class that releases the frame scratch buffer in the destructor.
+class FrameScratchBufferReleaser {
+ public:
+  FrameScratchBufferReleaser(
+      FrameScratchBufferPool* frame_scratch_buffer_pool,
+      std::unique_ptr<FrameScratchBuffer>* frame_scratch_buffer)
+      : frame_scratch_buffer_pool_(frame_scratch_buffer_pool),
+        frame_scratch_buffer_(frame_scratch_buffer) {}
+  ~FrameScratchBufferReleaser() {
+    frame_scratch_buffer_pool_->Release(std::move(*frame_scratch_buffer_));
+  }
+
+ private:
+  FrameScratchBufferPool* const frame_scratch_buffer_pool_;
+  std::unique_ptr<FrameScratchBuffer>* const frame_scratch_buffer_;
+};
+
+// Sets the |frame|'s segmentation map for two cases. The third case is handled
+// in Tile::DecodeBlock().
+void SetSegmentationMap(const ObuFrameHeader& frame_header,
+                        const SegmentationMap* prev_segment_ids,
+                        RefCountedBuffer* const frame) {
+  if (!frame_header.segmentation.enabled) {
+    // All segment_id's are 0.
+    frame->segmentation_map()->Clear();
+  } else if (!frame_header.segmentation.update_map) {
+    // Copy from prev_segment_ids.
+    if (prev_segment_ids == nullptr) {
+      // Treat a null prev_segment_ids pointer as if it pointed to a
+      // segmentation map containing all 0s.
+      frame->segmentation_map()->Clear();
+    } else {
+      frame->segmentation_map()->CopyFrom(*prev_segment_ids);
+    }
+  }
+}
+
+StatusCode DecodeTilesNonFrameParallel(
+    const ObuSequenceHeader& sequence_header,
+    const ObuFrameHeader& frame_header,
+    const Vector<std::unique_ptr<Tile>>& tiles,
+    FrameScratchBuffer* const frame_scratch_buffer,
+    PostFilter* const post_filter) {
+  // Decode in superblock row order.
+  const int block_width4x4 = sequence_header.use_128x128_superblock ? 32 : 16;
+  std::unique_ptr<TileScratchBuffer> tile_scratch_buffer =
+      frame_scratch_buffer->tile_scratch_buffer_pool.Get();
+  if (tile_scratch_buffer == nullptr) return kLibgav1StatusOutOfMemory;
+  for (int row4x4 = 0; row4x4 < frame_header.rows4x4;
+       row4x4 += block_width4x4) {
+    for (const auto& tile_ptr : tiles) {
+      if (!tile_ptr->ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>(
+              row4x4, tile_scratch_buffer.get())) {
+        return kLibgav1StatusUnknownError;
+      }
+    }
+    post_filter->ApplyFilteringForOneSuperBlockRow(
+        row4x4, block_width4x4, row4x4 + block_width4x4 >= frame_header.rows4x4,
+        /*do_deblock=*/true);
+  }
+  frame_scratch_buffer->tile_scratch_buffer_pool.Release(
+      std::move(tile_scratch_buffer));
+  return kStatusOk;
+}
+
+StatusCode DecodeTilesThreadedNonFrameParallel(
+    const Vector<std::unique_ptr<Tile>>& tiles,
+    FrameScratchBuffer* const frame_scratch_buffer,
+    PostFilter* const post_filter,
+    BlockingCounterWithStatus* const pending_tiles) {
+  ThreadingStrategy& threading_strategy =
+      frame_scratch_buffer->threading_strategy;
+  const int num_workers = threading_strategy.tile_thread_count();
+  BlockingCounterWithStatus pending_workers(num_workers);
+  std::atomic<int> tile_counter(0);
+  const int tile_count = static_cast<int>(tiles.size());
+  bool tile_decoding_failed = false;
+  // Submit tile decoding jobs to the thread pool.
+  for (int i = 0; i < num_workers; ++i) {
+    threading_strategy.tile_thread_pool()->Schedule([&tiles, tile_count,
+                                                     &tile_counter,
+                                                     &pending_workers,
+                                                     &pending_tiles]() {
+      bool failed = false;
+      int index;
+      while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
+             tile_count) {
+        if (!failed) {
+          const auto& tile_ptr = tiles[index];
+          if (!tile_ptr->ParseAndDecode()) {
+            LIBGAV1_DLOG(ERROR, "Error decoding tile #%d", tile_ptr->number());
+            failed = true;
+          }
+        } else {
+          pending_tiles->Decrement(false);
+        }
+      }
+      pending_workers.Decrement(!failed);
+    });
+  }
+  // Have the current thread partake in tile decoding.
+  int index;
+  while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
+         tile_count) {
+    if (!tile_decoding_failed) {
+      const auto& tile_ptr = tiles[index];
+      if (!tile_ptr->ParseAndDecode()) {
+        LIBGAV1_DLOG(ERROR, "Error decoding tile #%d", tile_ptr->number());
+        tile_decoding_failed = true;
+      }
+    } else {
+      pending_tiles->Decrement(false);
+    }
+  }
+  // Wait until all the workers are done. This ensures that all the tiles have
+  // been parsed.
+  tile_decoding_failed |= !pending_workers.Wait();
+  // Wait until all the tiles have been decoded.
+  tile_decoding_failed |= !pending_tiles->Wait();
+  if (tile_decoding_failed) return kStatusUnknownError;
+  assert(threading_strategy.post_filter_thread_pool() != nullptr);
+  post_filter->ApplyFilteringThreaded();
+  return kStatusOk;
+}
+
+StatusCode DecodeTilesFrameParallel(
+    const ObuSequenceHeader& sequence_header,
+    const ObuFrameHeader& frame_header,
+    const Vector<std::unique_ptr<Tile>>& tiles,
+    const SymbolDecoderContext& saved_symbol_decoder_context,
+    const SegmentationMap* const prev_segment_ids,
+    FrameScratchBuffer* const frame_scratch_buffer,
+    PostFilter* const post_filter, RefCountedBuffer* const current_frame) {
+  // Parse the frame.
+  for (const auto& tile : tiles) {
+    if (!tile->Parse()) {
+      LIBGAV1_DLOG(ERROR, "Failed to parse tile number: %d\n", tile->number());
+      return kStatusUnknownError;
+    }
+  }
+  if (frame_header.enable_frame_end_update_cdf) {
+    frame_scratch_buffer->symbol_decoder_context = saved_symbol_decoder_context;
+  }
+  current_frame->SetFrameContext(frame_scratch_buffer->symbol_decoder_context);
+  SetSegmentationMap(frame_header, prev_segment_ids, current_frame);
+  // Mark frame as parsed.
+  current_frame->SetFrameState(kFrameStateParsed);
+  std::unique_ptr<TileScratchBuffer> tile_scratch_buffer =
+      frame_scratch_buffer->tile_scratch_buffer_pool.Get();
+  if (tile_scratch_buffer == nullptr) {
+    return kStatusOutOfMemory;
+  }
+  const int block_width4x4 = sequence_header.use_128x128_superblock ? 32 : 16;
+  // Decode in superblock row order (inter prediction in the Tile class will
+  // block until the required superblocks in the reference frame are decoded).
+  for (int row4x4 = 0; row4x4 < frame_header.rows4x4;
+       row4x4 += block_width4x4) {
+    for (const auto& tile_ptr : tiles) {
+      if (!tile_ptr->ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
+              row4x4, tile_scratch_buffer.get())) {
+        LIBGAV1_DLOG(ERROR, "Failed to decode tile number: %d\n",
+                     tile_ptr->number());
+        return kStatusUnknownError;
+      }
+    }
+    const int progress_row = post_filter->ApplyFilteringForOneSuperBlockRow(
+        row4x4, block_width4x4, row4x4 + block_width4x4 >= frame_header.rows4x4,
+        /*do_deblock=*/true);
+    if (progress_row >= 0) {
+      current_frame->SetProgress(progress_row);
+    }
+  }
+  // Mark frame as decoded (we no longer care about row-level progress since the
+  // entire frame has been decoded).
+  current_frame->SetFrameState(kFrameStateDecoded);
+  frame_scratch_buffer->tile_scratch_buffer_pool.Release(
+      std::move(tile_scratch_buffer));
+  return kStatusOk;
+}
+
+// Helper function used by DecodeTilesThreadedFrameParallel. Applies the
+// deblocking filter for tile boundaries for the superblock row at |row4x4|.
+void ApplyDeblockingFilterForTileBoundaries(
+    PostFilter* const post_filter, const std::unique_ptr<Tile>* tile_row_base,
+    const ObuFrameHeader& frame_header, int row4x4, int block_width4x4,
+    int tile_columns, bool decode_entire_tiles_in_worker_threads) {
+  // Apply vertical deblock filtering for the first 64 columns of each tile.
+  for (int tile_column = 0; tile_column < tile_columns; ++tile_column) {
+    const Tile& tile = *tile_row_base[tile_column];
+    post_filter->ApplyDeblockFilter(
+        kLoopFilterTypeVertical, row4x4, tile.column4x4_start(),
+        tile.column4x4_start() + kNum4x4InLoopFilterUnit, block_width4x4);
+  }
+  if (decode_entire_tiles_in_worker_threads &&
+      row4x4 == tile_row_base[0]->row4x4_start()) {
+    // This is the first superblock row of a tile row. In this case, apply
+    // horizontal deblock filtering for the entire superblock row.
+    post_filter->ApplyDeblockFilter(kLoopFilterTypeHorizontal, row4x4, 0,
+                                    frame_header.columns4x4, block_width4x4);
+  } else {
+    // Apply horizontal deblock filtering for the first 64 columns of the
+    // first tile.
+    const Tile& first_tile = *tile_row_base[0];
+    post_filter->ApplyDeblockFilter(
+        kLoopFilterTypeHorizontal, row4x4, first_tile.column4x4_start(),
+        first_tile.column4x4_start() + kNum4x4InLoopFilterUnit, block_width4x4);
+    // Apply horizontal deblock filtering for the last 64 columns of the
+    // previous tile and the first 64 columns of the current tile.
+    for (int tile_column = 1; tile_column < tile_columns; ++tile_column) {
+      const Tile& tile = *tile_row_base[tile_column];
+      // If the previous tile has more than 64 columns, then include those
+      // for the horizontal deblock.
+      const Tile& previous_tile = *tile_row_base[tile_column - 1];
+      const int column4x4_start =
+          tile.column4x4_start() -
+          ((tile.column4x4_start() - kNum4x4InLoopFilterUnit !=
+            previous_tile.column4x4_start())
+               ? kNum4x4InLoopFilterUnit
+               : 0);
+      post_filter->ApplyDeblockFilter(
+          kLoopFilterTypeHorizontal, row4x4, column4x4_start,
+          tile.column4x4_start() + kNum4x4InLoopFilterUnit, block_width4x4);
+    }
+    // Apply horizontal deblock filtering for the last 64 columns of the
+    // last tile.
+    const Tile& last_tile = *tile_row_base[tile_columns - 1];
+    // Identify the last column4x4 value and do horizontal filtering for
+    // that column4x4. The value of last column4x4 is the nearest multiple
+    // of 16 that is before tile.column4x4_end().
+    const int column4x4_start = (last_tile.column4x4_end() - 1) & ~15;
+    // If column4x4_start is the same as tile.column4x4_start() then it
+    // means that the last tile has <= 64 columns. So there is nothing left
+    // to deblock (since it was already deblocked in the loop above).
+    if (column4x4_start != last_tile.column4x4_start()) {
+      post_filter->ApplyDeblockFilter(
+          kLoopFilterTypeHorizontal, row4x4, column4x4_start,
+          last_tile.column4x4_end(), block_width4x4);
+    }
+  }
+}
+
+// Helper function used by DecodeTilesThreadedFrameParallel. Decodes the
+// superblock row starting at |row4x4| for tile at index |tile_index| in the
+// list of tiles |tiles|. If the decoding is successful, then it does the
+// following:
+//   * Schedule the next superblock row in the current tile column for decoding
+//     (the next superblock row may be in a different tile than the current
+//     one).
+//   * If an entire superblock row of the frame has been decoded, it notifies
+//     the waiters (if there are any).
+void DecodeSuperBlockRowInTile(
+    const Vector<std::unique_ptr<Tile>>& tiles, size_t tile_index, int row4x4,
+    const int superblock_size4x4, const int tile_columns,
+    const int superblock_rows, FrameScratchBuffer* const frame_scratch_buffer,
+    PostFilter* const post_filter, BlockingCounter* const pending_jobs) {
+  std::unique_ptr<TileScratchBuffer> scratch_buffer =
+      frame_scratch_buffer->tile_scratch_buffer_pool.Get();
+  if (scratch_buffer == nullptr) {
+    SetFailureAndNotifyAll(frame_scratch_buffer, superblock_rows);
+    return;
+  }
+  Tile& tile = *tiles[tile_index];
+  const bool ok = tile.ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
+      row4x4, scratch_buffer.get());
+  frame_scratch_buffer->tile_scratch_buffer_pool.Release(
+      std::move(scratch_buffer));
+  if (!ok) {
+    SetFailureAndNotifyAll(frame_scratch_buffer, superblock_rows);
+    return;
+  }
+  if (post_filter->DoDeblock()) {
+    // Apply vertical deblock filtering for all the columns in this tile except
+    // for the first 64 columns.
+    post_filter->ApplyDeblockFilter(
+        kLoopFilterTypeVertical, row4x4,
+        tile.column4x4_start() + kNum4x4InLoopFilterUnit, tile.column4x4_end(),
+        superblock_size4x4);
+    // Apply horizontal deblock filtering for all the columns in this tile
+    // except for the first and the last 64 columns.
+    // Note about the last tile of each row: For the last tile, column4x4_end
+    // may not be a multiple of 16. In that case it is still okay to simply
+    // subtract 16 since ApplyDeblockFilter() will only do the filters in
+    // increments of 64 columns (or 32 columns for chroma with subsampling).
+    post_filter->ApplyDeblockFilter(
+        kLoopFilterTypeHorizontal, row4x4,
+        tile.column4x4_start() + kNum4x4InLoopFilterUnit,
+        tile.column4x4_end() - kNum4x4InLoopFilterUnit, superblock_size4x4);
+  }
+  const int superblock_size4x4_log2 = FloorLog2(superblock_size4x4);
+  const int index = row4x4 >> superblock_size4x4_log2;
+  int* const superblock_row_progress =
+      frame_scratch_buffer->superblock_row_progress.get();
+  std::condition_variable* const superblock_row_progress_condvar =
+      frame_scratch_buffer->superblock_row_progress_condvar.get();
+  bool notify;
+  {
+    std::lock_guard<std::mutex> lock(
+        frame_scratch_buffer->superblock_row_mutex);
+    notify = ++superblock_row_progress[index] == tile_columns;
+  }
+  if (notify) {
+    // We are done decoding this superblock row. Notify the post filtering
+    // thread.
+    superblock_row_progress_condvar[index].notify_one();
+  }
+  // Schedule the next superblock row (if one exists).
+  ThreadPool& thread_pool =
+      *frame_scratch_buffer->threading_strategy.thread_pool();
+  const int next_row4x4 = row4x4 + superblock_size4x4;
+  if (!tile.IsRow4x4Inside(next_row4x4)) {
+    tile_index += tile_columns;
+  }
+  if (tile_index >= tiles.size()) return;
+  pending_jobs->IncrementBy(1);
+  thread_pool.Schedule([&tiles, tile_index, next_row4x4, superblock_size4x4,
+                        tile_columns, superblock_rows, frame_scratch_buffer,
+                        post_filter, pending_jobs]() {
+    DecodeSuperBlockRowInTile(tiles, tile_index, next_row4x4,
+                              superblock_size4x4, tile_columns, superblock_rows,
+                              frame_scratch_buffer, post_filter, pending_jobs);
+    pending_jobs->Decrement();
+  });
+}
+
+StatusCode DecodeTilesThreadedFrameParallel(
+    const ObuSequenceHeader& sequence_header,
+    const ObuFrameHeader& frame_header,
+    const Vector<std::unique_ptr<Tile>>& tiles,
+    const SymbolDecoderContext& saved_symbol_decoder_context,
+    const SegmentationMap* const prev_segment_ids,
+    FrameScratchBuffer* const frame_scratch_buffer,
+    PostFilter* const post_filter, RefCountedBuffer* const current_frame) {
+  // Parse the frame.
+  ThreadPool& thread_pool =
+      *frame_scratch_buffer->threading_strategy.thread_pool();
+  std::atomic<int> tile_counter(0);
+  const int tile_count = static_cast<int>(tiles.size());
+  const int num_workers = thread_pool.num_threads();
+  BlockingCounterWithStatus parse_workers(num_workers);
+  // Submit tile parsing jobs to the thread pool.
+  for (int i = 0; i < num_workers; ++i) {
+    thread_pool.Schedule([&tiles, tile_count, &tile_counter, &parse_workers]() {
+      bool failed = false;
+      int index;
+      while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
+             tile_count) {
+        if (!failed) {
+          const auto& tile_ptr = tiles[index];
+          if (!tile_ptr->Parse()) {
+            LIBGAV1_DLOG(ERROR, "Error parsing tile #%d", tile_ptr->number());
+            failed = true;
+          }
+        }
+      }
+      parse_workers.Decrement(!failed);
+    });
+  }
+
+  // Have the current thread participate in parsing.
+  bool failed = false;
+  int index;
+  while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
+         tile_count) {
+    if (!failed) {
+      const auto& tile_ptr = tiles[index];
+      if (!tile_ptr->Parse()) {
+        LIBGAV1_DLOG(ERROR, "Error parsing tile #%d", tile_ptr->number());
+        failed = true;
+      }
+    }
+  }
+
+  // Wait until all the parse workers are done. This ensures that all the tiles
+  // have been parsed.
+  if (!parse_workers.Wait() || failed) {
+    return kLibgav1StatusUnknownError;
+  }
+  if (frame_header.enable_frame_end_update_cdf) {
+    frame_scratch_buffer->symbol_decoder_context = saved_symbol_decoder_context;
+  }
+  current_frame->SetFrameContext(frame_scratch_buffer->symbol_decoder_context);
+  SetSegmentationMap(frame_header, prev_segment_ids, current_frame);
+  current_frame->SetFrameState(kFrameStateParsed);
+
+  // Decode the frame.
+  const int block_width4x4 = sequence_header.use_128x128_superblock ? 32 : 16;
+  const int block_width4x4_log2 =
+      sequence_header.use_128x128_superblock ? 5 : 4;
+  const int superblock_rows =
+      (frame_header.rows4x4 + block_width4x4 - 1) >> block_width4x4_log2;
+  if (!frame_scratch_buffer->superblock_row_progress.Resize(superblock_rows) ||
+      !frame_scratch_buffer->superblock_row_progress_condvar.Resize(
+          superblock_rows)) {
+    return kLibgav1StatusOutOfMemory;
+  }
+  int* const superblock_row_progress =
+      frame_scratch_buffer->superblock_row_progress.get();
+  memset(superblock_row_progress, 0,
+         superblock_rows * sizeof(superblock_row_progress[0]));
+  frame_scratch_buffer->tile_decoding_failed = false;
+  const int tile_columns = frame_header.tile_info.tile_columns;
+  const bool decode_entire_tiles_in_worker_threads =
+      num_workers >= tile_columns;
+  BlockingCounter pending_jobs(
+      decode_entire_tiles_in_worker_threads ? num_workers : tile_columns);
+  if (decode_entire_tiles_in_worker_threads) {
+    // Submit tile decoding jobs to the thread pool.
+    tile_counter = 0;
+    for (int i = 0; i < num_workers; ++i) {
+      thread_pool.Schedule([&tiles, tile_count, &tile_counter, &pending_jobs,
+                            frame_scratch_buffer, superblock_rows]() {
+        bool failed = false;
+        int index;
+        while ((index = tile_counter.fetch_add(1, std::memory_order_relaxed)) <
+               tile_count) {
+          if (failed) continue;
+          const auto& tile_ptr = tiles[index];
+          if (!tile_ptr->Decode(
+                  &frame_scratch_buffer->superblock_row_mutex,
+                  frame_scratch_buffer->superblock_row_progress.get(),
+                  frame_scratch_buffer->superblock_row_progress_condvar
+                      .get())) {
+            LIBGAV1_DLOG(ERROR, "Error decoding tile #%d", tile_ptr->number());
+            failed = true;
+            SetFailureAndNotifyAll(frame_scratch_buffer, superblock_rows);
+          }
+        }
+        pending_jobs.Decrement();
+      });
+    }
+  } else {
+    // Schedule the jobs for first tile row.
+    for (int tile_index = 0; tile_index < tile_columns; ++tile_index) {
+      thread_pool.Schedule([&tiles, tile_index, block_width4x4, tile_columns,
+                            superblock_rows, frame_scratch_buffer, post_filter,
+                            &pending_jobs]() {
+        DecodeSuperBlockRowInTile(
+            tiles, tile_index, 0, block_width4x4, tile_columns, superblock_rows,
+            frame_scratch_buffer, post_filter, &pending_jobs);
+        pending_jobs.Decrement();
+      });
+    }
+  }
+
+  // Current thread will do the post filters.
+  std::condition_variable* const superblock_row_progress_condvar =
+      frame_scratch_buffer->superblock_row_progress_condvar.get();
+  const std::unique_ptr<Tile>* tile_row_base = &tiles[0];
+  for (int row4x4 = 0, index = 0; row4x4 < frame_header.rows4x4;
+       row4x4 += block_width4x4, ++index) {
+    if (!tile_row_base[0]->IsRow4x4Inside(row4x4)) {
+      tile_row_base += tile_columns;
+    }
+    {
+      std::unique_lock<std::mutex> lock(
+          frame_scratch_buffer->superblock_row_mutex);
+      while (superblock_row_progress[index] != tile_columns &&
+             !frame_scratch_buffer->tile_decoding_failed) {
+        superblock_row_progress_condvar[index].wait(lock);
+      }
+      if (frame_scratch_buffer->tile_decoding_failed) break;
+    }
+    if (post_filter->DoDeblock()) {
+      // Apply deblocking filter for the tile boundaries of this superblock row.
+      // The deblocking filter for the internal blocks will be applied in the
+      // tile worker threads. In this thread, we will only have to apply
+      // deblocking filter for the tile boundaries.
+      ApplyDeblockingFilterForTileBoundaries(
+          post_filter, tile_row_base, frame_header, row4x4, block_width4x4,
+          tile_columns, decode_entire_tiles_in_worker_threads);
+    }
+    // Apply all the post filters other than deblocking.
+    const int progress_row = post_filter->ApplyFilteringForOneSuperBlockRow(
+        row4x4, block_width4x4, row4x4 + block_width4x4 >= frame_header.rows4x4,
+        /*do_deblock=*/false);
+    if (progress_row >= 0) {
+      current_frame->SetProgress(progress_row);
+    }
+  }
+  // Wait until all the pending jobs are done. This ensures that all the tiles
+  // have been decoded and wrapped up.
+  pending_jobs.Wait();
+  {
+    std::lock_guard<std::mutex> lock(
+        frame_scratch_buffer->superblock_row_mutex);
+    if (frame_scratch_buffer->tile_decoding_failed) {
+      return kLibgav1StatusUnknownError;
+    }
+  }
+
+  current_frame->SetFrameState(kFrameStateDecoded);
+  return kStatusOk;
+}
+
+}  // namespace
+
+// static
+StatusCode DecoderImpl::Create(const DecoderSettings* settings,
+                               std::unique_ptr<DecoderImpl>* output) {
+  if (settings->threads <= 0) {
+    LIBGAV1_DLOG(ERROR, "Invalid settings->threads: %d.", settings->threads);
+    return kStatusInvalidArgument;
+  }
+  if (settings->frame_parallel) {
+    if (settings->release_input_buffer == nullptr) {
+      LIBGAV1_DLOG(ERROR,
+                   "release_input_buffer callback must not be null when "
+                   "frame_parallel is true.");
+      return kStatusInvalidArgument;
+    }
+  }
+  std::unique_ptr<DecoderImpl> impl(new (std::nothrow) DecoderImpl(settings));
+  if (impl == nullptr) {
+    LIBGAV1_DLOG(ERROR, "Failed to allocate DecoderImpl.");
+    return kStatusOutOfMemory;
+  }
+  const StatusCode status = impl->Init();
+  if (status != kStatusOk) return status;
+  *output = std::move(impl);
+  return kStatusOk;
+}
+
+DecoderImpl::DecoderImpl(const DecoderSettings* settings)
+    : buffer_pool_(settings->on_frame_buffer_size_changed,
+                   settings->get_frame_buffer, settings->release_frame_buffer,
+                   settings->callback_private_data),
+      settings_(*settings) {
+  dsp::DspInit();
+}
+
+DecoderImpl::~DecoderImpl() {
+  // Clean up and wait until all the threads have stopped. We just have to pass
+  // in a dummy status that is not kStatusOk or kStatusTryAgain to trigger the
+  // path that clears all the threads and structs.
+  SignalFailure(kStatusUnknownError);
+  // Release any other frame buffer references that we may be holding on to.
+  ReleaseOutputFrame();
+  output_frame_queue_.Clear();
+  for (auto& reference_frame : state_.reference_frame) {
+    reference_frame = nullptr;
+  }
+}
+
+StatusCode DecoderImpl::Init() {
+  if (!GenerateWedgeMask(&wedge_masks_)) {
+    LIBGAV1_DLOG(ERROR, "GenerateWedgeMask() failed.");
+    return kStatusOutOfMemory;
+  }
+  if (!output_frame_queue_.Init(kMaxLayers)) {
+    LIBGAV1_DLOG(ERROR, "output_frame_queue_.Init() failed.");
+    return kStatusOutOfMemory;
+  }
+  return kStatusOk;
+}
+
+StatusCode DecoderImpl::InitializeFrameThreadPoolAndTemporalUnitQueue(
+    const uint8_t* data, size_t size) {
+  is_frame_parallel_ = false;
+  if (settings_.frame_parallel) {
+    DecoderState state;
+    std::unique_ptr<ObuParser> obu(new (std::nothrow) ObuParser(
+        data, size, settings_.operating_point, &buffer_pool_, &state));
+    if (obu == nullptr) {
+      LIBGAV1_DLOG(ERROR, "Failed to allocate OBU parser.");
+      return kStatusOutOfMemory;
+    }
+    RefCountedBufferPtr current_frame;
+    const StatusCode status = obu->ParseOneFrame(&current_frame);
+    if (status != kStatusOk) {
+      LIBGAV1_DLOG(ERROR, "Failed to parse OBU.");
+      return status;
+    }
+    current_frame = nullptr;
+    // We assume that the first frame that was parsed will contain the frame
+    // header. This assumption is usually true in practice. So we will simply
+    // not use frame parallel mode if this is not the case.
+    if (settings_.threads > 1 &&
+        !InitializeThreadPoolsForFrameParallel(
+            settings_.threads, obu->frame_header().tile_info.tile_count,
+            obu->frame_header().tile_info.tile_columns, &frame_thread_pool_,
+            &frame_scratch_buffer_pool_)) {
+      return kStatusOutOfMemory;
+    }
+  }
+  const int max_allowed_frames =
+      (frame_thread_pool_ != nullptr) ? frame_thread_pool_->num_threads() : 1;
+  assert(max_allowed_frames > 0);
+  if (!temporal_units_.Init(max_allowed_frames)) {
+    LIBGAV1_DLOG(ERROR, "temporal_units_.Init() failed.");
+    return kStatusOutOfMemory;
+  }
+  is_frame_parallel_ = frame_thread_pool_ != nullptr;
+  return kStatusOk;
+}
+
+StatusCode DecoderImpl::EnqueueFrame(const uint8_t* data, size_t size,
+                                     int64_t user_private_data,
+                                     void* buffer_private_data) {
+  if (data == nullptr || size == 0) return kStatusInvalidArgument;
+  if (HasFailure()) return kStatusUnknownError;
+  if (!seen_first_frame_) {
+    seen_first_frame_ = true;
+    const StatusCode status =
+        InitializeFrameThreadPoolAndTemporalUnitQueue(data, size);
+    if (status != kStatusOk) {
+      return SignalFailure(status);
+    }
+  }
+  if (temporal_units_.Full()) {
+    return kStatusTryAgain;
+  }
+  if (is_frame_parallel_) {
+    return ParseAndSchedule(data, size, user_private_data, buffer_private_data);
+  }
+  TemporalUnit temporal_unit(data, size, user_private_data,
+                             buffer_private_data);
+  temporal_units_.Push(std::move(temporal_unit));
+  return kStatusOk;
+}
+
+StatusCode DecoderImpl::SignalFailure(StatusCode status) {
+  if (status == kStatusOk || status == kStatusTryAgain) return status;
+  // Set the |failure_status_| first so that any pending jobs in
+  // |frame_thread_pool_| will exit right away when the thread pool is being
+  // released below.
+  {
+    std::lock_guard<std::mutex> lock(mutex_);
+    failure_status_ = status;
+  }
+  // Make sure all waiting threads exit.
+  buffer_pool_.Abort();
+  frame_thread_pool_ = nullptr;
+  while (!temporal_units_.Empty()) {
+    if (settings_.release_input_buffer != nullptr) {
+      settings_.release_input_buffer(
+          settings_.callback_private_data,
+          temporal_units_.Front().buffer_private_data);
+    }
+    temporal_units_.Pop();
+  }
+  return status;
+}
+
+// DequeueFrame() follows the following policy to avoid holding unnecessary
+// frame buffer references in output_frame_: output_frame_ must be null when
+// DequeueFrame() returns false.
+StatusCode DecoderImpl::DequeueFrame(const DecoderBuffer** out_ptr) {
+  if (out_ptr == nullptr) {
+    LIBGAV1_DLOG(ERROR, "Invalid argument: out_ptr == nullptr.");
+    return kStatusInvalidArgument;
+  }
+  // We assume a call to DequeueFrame() indicates that the caller is no longer
+  // using the previous output frame, so we can release it.
+  ReleaseOutputFrame();
+  if (temporal_units_.Empty()) {
+    // No input frames to decode.
+    *out_ptr = nullptr;
+    return kStatusNothingToDequeue;
+  }
+  TemporalUnit& temporal_unit = temporal_units_.Front();
+  if (!is_frame_parallel_) {
+    // If |output_frame_queue_| is not empty, then return the first frame from
+    // that queue.
+    if (!output_frame_queue_.Empty()) {
+      RefCountedBufferPtr frame = std::move(output_frame_queue_.Front());
+      output_frame_queue_.Pop();
+      buffer_.user_private_data = temporal_unit.user_private_data;
+      if (output_frame_queue_.Empty()) {
+        temporal_units_.Pop();
+      }
+      const StatusCode status = CopyFrameToOutputBuffer(frame);
+      if (status != kStatusOk) {
+        return status;
+      }
+      *out_ptr = &buffer_;
+      return kStatusOk;
+    }
+    // Decode the next available temporal unit and return.
+    const StatusCode status = DecodeTemporalUnit(temporal_unit, out_ptr);
+    if (status != kStatusOk) {
+      // In case of failure, discard all the output frames that we may be
+      // holding on references to.
+      output_frame_queue_.Clear();
+    }
+    if (settings_.release_input_buffer != nullptr) {
+      settings_.release_input_buffer(settings_.callback_private_data,
+                                     temporal_unit.buffer_private_data);
+    }
+    if (output_frame_queue_.Empty()) {
+      temporal_units_.Pop();
+    }
+    return status;
+  }
+  {
+    std::unique_lock<std::mutex> lock(mutex_);
+    if (settings_.blocking_dequeue) {
+      while (!temporal_unit.decoded && failure_status_ == kStatusOk) {
+        decoded_condvar_.wait(lock);
+      }
+    } else {
+      if (!temporal_unit.decoded && failure_status_ == kStatusOk) {
+        return kStatusTryAgain;
+      }
+    }
+    if (failure_status_ != kStatusOk) {
+      const StatusCode failure_status = failure_status_;
+      lock.unlock();
+      return SignalFailure(failure_status);
+    }
+  }
+  if (settings_.release_input_buffer != nullptr &&
+      !temporal_unit.released_input_buffer) {
+    temporal_unit.released_input_buffer = true;
+    settings_.release_input_buffer(settings_.callback_private_data,
+                                   temporal_unit.buffer_private_data);
+  }
+  if (temporal_unit.status != kStatusOk) {
+    temporal_units_.Pop();
+    return SignalFailure(temporal_unit.status);
+  }
+  if (!temporal_unit.has_displayable_frame) {
+    *out_ptr = nullptr;
+    temporal_units_.Pop();
+    return kStatusOk;
+  }
+  assert(temporal_unit.output_layer_count > 0);
+  StatusCode status = CopyFrameToOutputBuffer(
+      temporal_unit.output_layers[temporal_unit.output_layer_count - 1].frame);
+  temporal_unit.output_layers[temporal_unit.output_layer_count - 1].frame =
+      nullptr;
+  if (status != kStatusOk) {
+    temporal_units_.Pop();
+    return SignalFailure(status);
+  }
+  buffer_.user_private_data = temporal_unit.user_private_data;
+  *out_ptr = &buffer_;
+  if (--temporal_unit.output_layer_count == 0) {
+    temporal_units_.Pop();
+  }
+  return kStatusOk;
+}
+
+StatusCode DecoderImpl::ParseAndSchedule(const uint8_t* data, size_t size,
+                                         int64_t user_private_data,
+                                         void* buffer_private_data) {
+  TemporalUnit temporal_unit(data, size, user_private_data,
+                             buffer_private_data);
+  std::unique_ptr<ObuParser> obu(new (std::nothrow) ObuParser(
+      temporal_unit.data, temporal_unit.size, settings_.operating_point,
+      &buffer_pool_, &state_));
+  if (obu == nullptr) {
+    LIBGAV1_DLOG(ERROR, "Failed to allocate OBU parser.");
+    return kStatusOutOfMemory;
+  }
+  if (has_sequence_header_) {
+    obu->set_sequence_header(sequence_header_);
+  }
+  StatusCode status;
+  int position_in_temporal_unit = 0;
+  while (obu->HasData()) {
+    RefCountedBufferPtr current_frame;
+    status = obu->ParseOneFrame(&current_frame);
+    if (status != kStatusOk) {
+      LIBGAV1_DLOG(ERROR, "Failed to parse OBU.");
+      return status;
+    }
+    if (!MaybeInitializeQuantizerMatrix(obu->frame_header())) {
+      LIBGAV1_DLOG(ERROR, "InitializeQuantizerMatrix() failed.");
+      return kStatusOutOfMemory;
+    }
+    if (IsNewSequenceHeader(*obu)) {
+      const ObuSequenceHeader& sequence_header = obu->sequence_header();
+      const Libgav1ImageFormat image_format =
+          ComposeImageFormat(sequence_header.color_config.is_monochrome,
+                             sequence_header.color_config.subsampling_x,
+                             sequence_header.color_config.subsampling_y);
+      const int max_bottom_border = GetBottomBorderPixels(
+          /*do_cdef=*/true, /*do_restoration=*/true,
+          /*do_superres=*/true, sequence_header.color_config.subsampling_y);
+      // TODO(vigneshv): This may not be the right place to call this callback
+      // for the frame parallel case. Investigate and fix it.
+      if (!buffer_pool_.OnFrameBufferSizeChanged(
+              sequence_header.color_config.bitdepth, image_format,
+              sequence_header.max_frame_width, sequence_header.max_frame_height,
+              kBorderPixels, kBorderPixels, kBorderPixels, max_bottom_border)) {
+        LIBGAV1_DLOG(ERROR, "buffer_pool_.OnFrameBufferSizeChanged failed.");
+        return kStatusUnknownError;
+      }
+    }
+    // This can happen when there are multiple spatial/temporal layers and if
+    // all the layers are outside the current operating point.
+    if (current_frame == nullptr) {
+      continue;
+    }
+    // Note that we cannot set EncodedFrame.temporal_unit here. It will be set
+    // in the code below after |temporal_unit| is std::move'd into the
+    // |temporal_units_| queue.
+    if (!temporal_unit.frames.emplace_back(obu.get(), state_, current_frame,
+                                           position_in_temporal_unit++)) {
+      LIBGAV1_DLOG(ERROR, "temporal_unit.frames.emplace_back failed.");
+      return kStatusOutOfMemory;
+    }
+    state_.UpdateReferenceFrames(current_frame,
+                                 obu->frame_header().refresh_frame_flags);
+  }
+  // This function cannot fail after this point. So it is okay to move the
+  // |temporal_unit| into |temporal_units_| queue.
+  temporal_units_.Push(std::move(temporal_unit));
+  if (temporal_units_.Back().frames.empty()) {
+    std::lock_guard<std::mutex> lock(mutex_);
+    temporal_units_.Back().has_displayable_frame = false;
+    temporal_units_.Back().decoded = true;
+    return kStatusOk;
+  }
+  for (auto& frame : temporal_units_.Back().frames) {
+    EncodedFrame* const encoded_frame = &frame;
+    encoded_frame->temporal_unit = &temporal_units_.Back();
+    frame_thread_pool_->Schedule([this, encoded_frame]() {
+      if (HasFailure()) return;
+      const StatusCode status = DecodeFrame(encoded_frame);
+      encoded_frame->state = {};
+      encoded_frame->frame = nullptr;
+      TemporalUnit& temporal_unit = *encoded_frame->temporal_unit;
+      std::lock_guard<std::mutex> lock(mutex_);
+      if (failure_status_ != kStatusOk) return;
+      // temporal_unit's status defaults to kStatusOk. So we need to set it only
+      // on error. If |failure_status_| is not kStatusOk at this point, it means
+      // that there has already been a failure. So we don't care about this
+      // subsequent failure.  We will simply return the error code of the first
+      // failure.
+      if (status != kStatusOk) {
+        temporal_unit.status = status;
+        if (failure_status_ == kStatusOk) {
+          failure_status_ = status;
+        }
+      }
+      temporal_unit.decoded =
+          ++temporal_unit.decoded_count == temporal_unit.frames.size();
+      if (temporal_unit.decoded && settings_.output_all_layers &&
+          temporal_unit.output_layer_count > 1) {
+        std::sort(
+            temporal_unit.output_layers,
+            temporal_unit.output_layers + temporal_unit.output_layer_count);
+      }
+      if (temporal_unit.decoded || failure_status_ != kStatusOk) {
+        decoded_condvar_.notify_one();
+      }
+    });
+  }
+  return kStatusOk;
+}
+
+StatusCode DecoderImpl::DecodeFrame(EncodedFrame* const encoded_frame) {
+  const ObuSequenceHeader& sequence_header = encoded_frame->sequence_header;
+  const ObuFrameHeader& frame_header = encoded_frame->frame_header;
+  RefCountedBufferPtr current_frame = std::move(encoded_frame->frame);
+
+  std::unique_ptr<FrameScratchBuffer> frame_scratch_buffer =
+      frame_scratch_buffer_pool_.Get();
+  if (frame_scratch_buffer == nullptr) {
+    LIBGAV1_DLOG(ERROR, "Error when getting FrameScratchBuffer.");
+    return kStatusOutOfMemory;
+  }
+  // |frame_scratch_buffer| will be released when this local variable goes out
+  // of scope (i.e.) on any return path in this function.
+  FrameScratchBufferReleaser frame_scratch_buffer_releaser(
+      &frame_scratch_buffer_pool_, &frame_scratch_buffer);
+
+  StatusCode status;
+  if (!frame_header.show_existing_frame) {
+    if (encoded_frame->tile_buffers.empty()) {
+      // This means that the last call to ParseOneFrame() did not actually
+      // have any tile groups. This could happen in rare cases (for example,
+      // if there is a Metadata OBU after the TileGroup OBU). We currently do
+      // not have a reason to handle those cases, so we simply continue.
+      return kStatusOk;
+    }
+    status = DecodeTiles(sequence_header, frame_header,
+                         encoded_frame->tile_buffers, encoded_frame->state,
+                         frame_scratch_buffer.get(), current_frame.get());
+    if (status != kStatusOk) {
+      return status;
+    }
+  } else {
+    if (!current_frame->WaitUntilDecoded()) {
+      return kStatusUnknownError;
+    }
+  }
+  if (!frame_header.show_frame && !frame_header.show_existing_frame) {
+    // This frame is not displayable. Not an error.
+    return kStatusOk;
+  }
+  RefCountedBufferPtr film_grain_frame;
+  status = ApplyFilmGrain(
+      sequence_header, frame_header, current_frame, &film_grain_frame,
+      frame_scratch_buffer->threading_strategy.thread_pool());
+  if (status != kStatusOk) {
+    return status;
+  }
+
+  TemporalUnit& temporal_unit = *encoded_frame->temporal_unit;
+  std::lock_guard<std::mutex> lock(mutex_);
+  if (temporal_unit.has_displayable_frame && !settings_.output_all_layers) {
+    assert(temporal_unit.output_frame_position >= 0);
+    // A displayable frame was already found in this temporal unit. This can
+    // happen if there are multiple spatial/temporal layers. Since
+    // |settings_.output_all_layers| is false, we will output only the last
+    // displayable frame.
+    if (temporal_unit.output_frame_position >
+        encoded_frame->position_in_temporal_unit) {
+      return kStatusOk;
+    }
+    // Replace any output frame that we may have seen before with the current
+    // frame.
+    assert(temporal_unit.output_layer_count == 1);
+    --temporal_unit.output_layer_count;
+  }
+  temporal_unit.has_displayable_frame = true;
+  temporal_unit.output_layers[temporal_unit.output_layer_count].frame =
+      std::move(film_grain_frame);
+  temporal_unit.output_layers[temporal_unit.output_layer_count]
+      .position_in_temporal_unit = encoded_frame->position_in_temporal_unit;
+  ++temporal_unit.output_layer_count;
+  temporal_unit.output_frame_position =
+      encoded_frame->position_in_temporal_unit;
+  return kStatusOk;
+}
+
+StatusCode DecoderImpl::DecodeTemporalUnit(const TemporalUnit& temporal_unit,
+                                           const DecoderBuffer** out_ptr) {
+  std::unique_ptr<ObuParser> obu(new (std::nothrow) ObuParser(
+      temporal_unit.data, temporal_unit.size, settings_.operating_point,
+      &buffer_pool_, &state_));
+  if (obu == nullptr) {
+    LIBGAV1_DLOG(ERROR, "Failed to allocate OBU parser.");
+    return kStatusOutOfMemory;
+  }
+  if (has_sequence_header_) {
+    obu->set_sequence_header(sequence_header_);
+  }
+  StatusCode status;
+  std::unique_ptr<FrameScratchBuffer> frame_scratch_buffer =
+      frame_scratch_buffer_pool_.Get();
+  if (frame_scratch_buffer == nullptr) {
+    LIBGAV1_DLOG(ERROR, "Error when getting FrameScratchBuffer.");
+    return kStatusOutOfMemory;
+  }
+  // |frame_scratch_buffer| will be released when this local variable goes out
+  // of scope (i.e.) on any return path in this function.
+  FrameScratchBufferReleaser frame_scratch_buffer_releaser(
+      &frame_scratch_buffer_pool_, &frame_scratch_buffer);
+
+  while (obu->HasData()) {
+    RefCountedBufferPtr current_frame;
+    status = obu->ParseOneFrame(&current_frame);
+    if (status != kStatusOk) {
+      LIBGAV1_DLOG(ERROR, "Failed to parse OBU.");
+      return status;
+    }
+    if (!MaybeInitializeQuantizerMatrix(obu->frame_header())) {
+      LIBGAV1_DLOG(ERROR, "InitializeQuantizerMatrix() failed.");
+      return kStatusOutOfMemory;
+    }
+    if (IsNewSequenceHeader(*obu)) {
+      const ObuSequenceHeader& sequence_header = obu->sequence_header();
+      const Libgav1ImageFormat image_format =
+          ComposeImageFormat(sequence_header.color_config.is_monochrome,
+                             sequence_header.color_config.subsampling_x,
+                             sequence_header.color_config.subsampling_y);
+      const int max_bottom_border = GetBottomBorderPixels(
+          /*do_cdef=*/true, /*do_restoration=*/true,
+          /*do_superres=*/true, sequence_header.color_config.subsampling_y);
+      if (!buffer_pool_.OnFrameBufferSizeChanged(
+              sequence_header.color_config.bitdepth, image_format,
+              sequence_header.max_frame_width, sequence_header.max_frame_height,
+              kBorderPixels, kBorderPixels, kBorderPixels, max_bottom_border)) {
+        LIBGAV1_DLOG(ERROR, "buffer_pool_.OnFrameBufferSizeChanged failed.");
+        return kStatusUnknownError;
+      }
+    }
+    if (!obu->frame_header().show_existing_frame) {
+      if (obu->tile_buffers().empty()) {
+        // This means that the last call to ParseOneFrame() did not actually
+        // have any tile groups. This could happen in rare cases (for example,
+        // if there is a Metadata OBU after the TileGroup OBU). We currently do
+        // not have a reason to handle those cases, so we simply continue.
+        continue;
+      }
+      status = DecodeTiles(obu->sequence_header(), obu->frame_header(),
+                           obu->tile_buffers(), state_,
+                           frame_scratch_buffer.get(), current_frame.get());
+      if (status != kStatusOk) {
+        return status;
+      }
+    }
+    state_.UpdateReferenceFrames(current_frame,
+                                 obu->frame_header().refresh_frame_flags);
+    if (obu->frame_header().show_frame ||
+        obu->frame_header().show_existing_frame) {
+      if (!output_frame_queue_.Empty() && !settings_.output_all_layers) {
+        // There is more than one displayable frame in the current operating
+        // point and |settings_.output_all_layers| is false. In this case, we
+        // simply return the last displayable frame as the output frame and
+        // ignore the rest.
+        assert(output_frame_queue_.Size() == 1);
+        output_frame_queue_.Pop();
+      }
+      RefCountedBufferPtr film_grain_frame;
+      status = ApplyFilmGrain(
+          obu->sequence_header(), obu->frame_header(), current_frame,
+          &film_grain_frame,
+          frame_scratch_buffer->threading_strategy.film_grain_thread_pool());
+      if (status != kStatusOk) return status;
+      output_frame_queue_.Push(std::move(film_grain_frame));
+    }
+  }
+  if (output_frame_queue_.Empty()) {
+    // No displayable frame in the temporal unit. Not an error.
+    *out_ptr = nullptr;
+    return kStatusOk;
+  }
+  status = CopyFrameToOutputBuffer(output_frame_queue_.Front());
+  output_frame_queue_.Pop();
+  if (status != kStatusOk) {
+    return status;
+  }
+  buffer_.user_private_data = temporal_unit.user_private_data;
+  *out_ptr = &buffer_;
+  return kStatusOk;
+}
+
+StatusCode DecoderImpl::CopyFrameToOutputBuffer(
+    const RefCountedBufferPtr& frame) {
+  YuvBuffer* yuv_buffer = frame->buffer();
+
+  buffer_.chroma_sample_position = frame->chroma_sample_position();
+
+  if (yuv_buffer->is_monochrome()) {
+    buffer_.image_format = kImageFormatMonochrome400;
+  } else {
+    if (yuv_buffer->subsampling_x() == 0 && yuv_buffer->subsampling_y() == 0) {
+      buffer_.image_format = kImageFormatYuv444;
+    } else if (yuv_buffer->subsampling_x() == 1 &&
+               yuv_buffer->subsampling_y() == 0) {
+      buffer_.image_format = kImageFormatYuv422;
+    } else if (yuv_buffer->subsampling_x() == 1 &&
+               yuv_buffer->subsampling_y() == 1) {
+      buffer_.image_format = kImageFormatYuv420;
+    } else {
+      LIBGAV1_DLOG(ERROR,
+                   "Invalid chroma subsampling values: cannot determine buffer "
+                   "image format.");
+      return kStatusInvalidArgument;
+    }
+  }
+  buffer_.color_range = sequence_header_.color_config.color_range;
+  buffer_.color_primary = sequence_header_.color_config.color_primary;
+  buffer_.transfer_characteristics =
+      sequence_header_.color_config.transfer_characteristics;
+  buffer_.matrix_coefficients =
+      sequence_header_.color_config.matrix_coefficients;
+
+  buffer_.bitdepth = yuv_buffer->bitdepth();
+  const int num_planes =
+      yuv_buffer->is_monochrome() ? kMaxPlanesMonochrome : kMaxPlanes;
+  int plane = kPlaneY;
+  for (; plane < num_planes; ++plane) {
+    buffer_.stride[plane] = yuv_buffer->stride(plane);
+    buffer_.plane[plane] = yuv_buffer->data(plane);
+    buffer_.displayed_width[plane] = yuv_buffer->width(plane);
+    buffer_.displayed_height[plane] = yuv_buffer->height(plane);
+  }
+  for (; plane < kMaxPlanes; ++plane) {
+    buffer_.stride[plane] = 0;
+    buffer_.plane[plane] = nullptr;
+    buffer_.displayed_width[plane] = 0;
+    buffer_.displayed_height[plane] = 0;
+  }
+  buffer_.spatial_id = frame->spatial_id();
+  buffer_.temporal_id = frame->temporal_id();
+  buffer_.buffer_private_data = frame->buffer_private_data();
+  output_frame_ = frame;
+  return kStatusOk;
+}
+
+void DecoderImpl::ReleaseOutputFrame() {
+  for (auto& plane : buffer_.plane) {
+    plane = nullptr;
+  }
+  output_frame_ = nullptr;
+}
+
+StatusCode DecoderImpl::DecodeTiles(
+    const ObuSequenceHeader& sequence_header,
+    const ObuFrameHeader& frame_header, const Vector<TileBuffer>& tile_buffers,
+    const DecoderState& state, FrameScratchBuffer* const frame_scratch_buffer,
+    RefCountedBuffer* const current_frame) {
+  frame_scratch_buffer->tile_scratch_buffer_pool.Reset(
+      sequence_header.color_config.bitdepth);
+  if (!frame_scratch_buffer->loop_restoration_info.Reset(
+          &frame_header.loop_restoration, frame_header.upscaled_width,
+          frame_header.height, sequence_header.color_config.subsampling_x,
+          sequence_header.color_config.subsampling_y,
+          sequence_header.color_config.is_monochrome)) {
+    LIBGAV1_DLOG(ERROR,
+                 "Failed to allocate memory for loop restoration info units.");
+    return kStatusOutOfMemory;
+  }
+  ThreadingStrategy& threading_strategy =
+      frame_scratch_buffer->threading_strategy;
+  if (!is_frame_parallel_ &&
+      !threading_strategy.Reset(frame_header, settings_.threads)) {
+    return kStatusOutOfMemory;
+  }
+  const bool do_cdef =
+      PostFilter::DoCdef(frame_header, settings_.post_filter_mask);
+  const int num_planes = sequence_header.color_config.is_monochrome
+                             ? kMaxPlanesMonochrome
+                             : kMaxPlanes;
+  const bool do_restoration = PostFilter::DoRestoration(
+      frame_header.loop_restoration, settings_.post_filter_mask, num_planes);
+  const bool do_superres =
+      PostFilter::DoSuperRes(frame_header, settings_.post_filter_mask);
+  // Use kBorderPixels for the left, right, and top borders. Only the bottom
+  // border may need to be bigger. Cdef border is needed only if we apply Cdef
+  // without multithreading.
+  const int bottom_border = GetBottomBorderPixels(
+      do_cdef && threading_strategy.post_filter_thread_pool() == nullptr,
+      do_restoration, do_superres, sequence_header.color_config.subsampling_y);
+  current_frame->set_chroma_sample_position(
+      sequence_header.color_config.chroma_sample_position);
+  if (!current_frame->Realloc(sequence_header.color_config.bitdepth,
+                              sequence_header.color_config.is_monochrome,
+                              frame_header.upscaled_width, frame_header.height,
+                              sequence_header.color_config.subsampling_x,
+                              sequence_header.color_config.subsampling_y,
+                              /*left_border=*/kBorderPixels,
+                              /*right_border=*/kBorderPixels,
+                              /*top_border=*/kBorderPixels, bottom_border)) {
+    LIBGAV1_DLOG(ERROR, "Failed to allocate memory for the decoder buffer.");
+    return kStatusOutOfMemory;
+  }
+  if (sequence_header.enable_cdef) {
+    if (!frame_scratch_buffer->cdef_index.Reset(
+            DivideBy16(frame_header.rows4x4 + kMaxBlockHeight4x4),
+            DivideBy16(frame_header.columns4x4 + kMaxBlockWidth4x4),
+            /*zero_initialize=*/false)) {
+      LIBGAV1_DLOG(ERROR, "Failed to allocate memory for cdef index.");
+      return kStatusOutOfMemory;
+    }
+  }
+  if (!frame_scratch_buffer->inter_transform_sizes.Reset(
+          frame_header.rows4x4 + kMaxBlockHeight4x4,
+          frame_header.columns4x4 + kMaxBlockWidth4x4,
+          /*zero_initialize=*/false)) {
+    LIBGAV1_DLOG(ERROR, "Failed to allocate memory for inter_transform_sizes.");
+    return kStatusOutOfMemory;
+  }
+  if (frame_header.use_ref_frame_mvs) {
+    if (!frame_scratch_buffer->motion_field.mv.Reset(
+            DivideBy2(frame_header.rows4x4), DivideBy2(frame_header.columns4x4),
+            /*zero_initialize=*/false) ||
+        !frame_scratch_buffer->motion_field.reference_offset.Reset(
+            DivideBy2(frame_header.rows4x4), DivideBy2(frame_header.columns4x4),
+            /*zero_initialize=*/false)) {
+      LIBGAV1_DLOG(ERROR,
+                   "Failed to allocate memory for temporal motion vectors.");
+      return kStatusOutOfMemory;
+    }
+
+    // For each motion vector, only mv[0] needs to be initialized to
+    // kInvalidMvValue, mv[1] is not necessary to be initialized and can be
+    // set to an arbitrary value. For simplicity, mv[1] is set to 0.
+    // The following memory initialization of contiguous memory is very fast. It
+    // is not recommended to make the initialization multi-threaded, unless the
+    // memory which needs to be initialized in each thread is still contiguous.
+    MotionVector invalid_mv;
+    invalid_mv.mv[0] = kInvalidMvValue;
+    invalid_mv.mv[1] = 0;
+    MotionVector* const motion_field_mv =
+        &frame_scratch_buffer->motion_field.mv[0][0];
+    std::fill(motion_field_mv,
+              motion_field_mv + frame_scratch_buffer->motion_field.mv.size(),
+              invalid_mv);
+  }
+
+  // The addition of kMaxBlockHeight4x4 and kMaxBlockWidth4x4 is necessary so
+  // that the block parameters cache can be filled in for the last row/column
+  // without having to check for boundary conditions.
+  if (!frame_scratch_buffer->block_parameters_holder.Reset(
+          frame_header.rows4x4 + kMaxBlockHeight4x4,
+          frame_header.columns4x4 + kMaxBlockWidth4x4,
+          sequence_header.use_128x128_superblock)) {
+    return kStatusOutOfMemory;
+  }
+  const dsp::Dsp* const dsp =
+      dsp::GetDspTable(sequence_header.color_config.bitdepth);
+  if (dsp == nullptr) {
+    LIBGAV1_DLOG(ERROR, "Failed to get the dsp table for bitdepth %d.",
+                 sequence_header.color_config.bitdepth);
+    return kStatusInternalError;
+  }
+
+  const int tile_count = frame_header.tile_info.tile_count;
+  assert(tile_count >= 1);
+  Vector<std::unique_ptr<Tile>> tiles;
+  if (!tiles.reserve(tile_count)) {
+    LIBGAV1_DLOG(ERROR, "tiles.reserve(%d) failed.\n", tile_count);
+    return kStatusOutOfMemory;
+  }
+
+  if (threading_strategy.row_thread_pool(0) != nullptr || is_frame_parallel_) {
+    if (frame_scratch_buffer->residual_buffer_pool == nullptr) {
+      frame_scratch_buffer->residual_buffer_pool.reset(
+          new (std::nothrow) ResidualBufferPool(
+              sequence_header.use_128x128_superblock,
+              sequence_header.color_config.subsampling_x,
+              sequence_header.color_config.subsampling_y,
+              sequence_header.color_config.bitdepth == 8 ? sizeof(int16_t)
+                                                         : sizeof(int32_t)));
+      if (frame_scratch_buffer->residual_buffer_pool == nullptr) {
+        LIBGAV1_DLOG(ERROR, "Failed to allocate residual buffer.\n");
+        return kStatusOutOfMemory;
+      }
+    } else {
+      frame_scratch_buffer->residual_buffer_pool->Reset(
+          sequence_header.use_128x128_superblock,
+          sequence_header.color_config.subsampling_x,
+          sequence_header.color_config.subsampling_y,
+          sequence_header.color_config.bitdepth == 8 ? sizeof(int16_t)
+                                                     : sizeof(int32_t));
+    }
+  }
+
+  if (threading_strategy.post_filter_thread_pool() != nullptr && do_cdef) {
+    // We need to store 4 rows per 64x64 unit.
+    const int num_units =
+        MultiplyBy4(RightShiftWithCeiling(frame_header.rows4x4, 4));
+    // subsampling_y is set to zero irrespective of the actual frame's
+    // subsampling since we need to store exactly |num_units| rows of the loop
+    // restoration border pixels.
+    if (!frame_scratch_buffer->cdef_border.Realloc(
+            sequence_header.color_config.bitdepth,
+            sequence_header.color_config.is_monochrome,
+            MultiplyBy4(frame_header.columns4x4), num_units,
+            sequence_header.color_config.subsampling_x,
+            /*subsampling_y=*/0, kBorderPixels, kBorderPixels, kBorderPixels,
+            kBorderPixels, nullptr, nullptr, nullptr)) {
+      return kStatusOutOfMemory;
+    }
+  }
+
+  if (do_restoration &&
+      (do_cdef || threading_strategy.post_filter_thread_pool() != nullptr)) {
+    // We need to store 4 rows per 64x64 unit.
+    const int num_units =
+        MultiplyBy4(RightShiftWithCeiling(frame_header.rows4x4, 4));
+    // subsampling_y is set to zero irrespective of the actual frame's
+    // subsampling since we need to store exactly |num_units| rows of the loop
+    // restoration border pixels.
+    if (!frame_scratch_buffer->loop_restoration_border.Realloc(
+            sequence_header.color_config.bitdepth,
+            sequence_header.color_config.is_monochrome,
+            frame_header.upscaled_width, num_units,
+            sequence_header.color_config.subsampling_x,
+            /*subsampling_y=*/0, kBorderPixels, kBorderPixels, kBorderPixels,
+            kBorderPixels, nullptr, nullptr, nullptr)) {
+      return kStatusOutOfMemory;
+    }
+  }
+
+  if (do_superres) {
+    const int pixel_size = sequence_header.color_config.bitdepth == 8
+                               ? sizeof(uint8_t)
+                               : sizeof(uint16_t);
+    if (!frame_scratch_buffer->superres_coefficients[kPlaneTypeY].Resize(
+            kSuperResFilterTaps * Align(frame_header.upscaled_width, 16) *
+            pixel_size)) {
+      LIBGAV1_DLOG(ERROR,
+                   "Failed to Resize superres_coefficients[kPlaneTypeY].");
+      return kStatusOutOfMemory;
+    }
+    if (!sequence_header.color_config.is_monochrome &&
+        sequence_header.color_config.subsampling_x != 0 &&
+        !frame_scratch_buffer->superres_coefficients[kPlaneTypeUV].Resize(
+            kSuperResFilterTaps *
+            Align(SubsampledValue(frame_header.upscaled_width, 1), 16) *
+            pixel_size)) {
+      LIBGAV1_DLOG(ERROR,
+                   "Failed to Resize superres_coefficients[kPlaneTypeUV].");
+      return kStatusOutOfMemory;
+    }
+  }
+
+  if (do_superres && threading_strategy.post_filter_thread_pool() != nullptr) {
+    const int num_threads =
+        threading_strategy.post_filter_thread_pool()->num_threads() + 1;
+    // subsampling_y is set to zero irrespective of the actual frame's
+    // subsampling since we need to store exactly |num_threads| rows of the
+    // down-scaled pixels.
+    // Left and right borders are for line extension. They are doubled for the Y
+    // plane to make sure the U and V planes have enough space after possible
+    // subsampling.
+    if (!frame_scratch_buffer->superres_line_buffer.Realloc(
+            sequence_header.color_config.bitdepth,
+            sequence_header.color_config.is_monochrome,
+            MultiplyBy4(frame_header.columns4x4), num_threads,
+            sequence_header.color_config.subsampling_x,
+            /*subsampling_y=*/0, 2 * kSuperResHorizontalBorder,
+            2 * (kSuperResHorizontalBorder + kSuperResHorizontalPadding), 0, 0,
+            nullptr, nullptr, nullptr)) {
+      LIBGAV1_DLOG(ERROR, "Failed to resize superres line buffer.\n");
+      return kStatusOutOfMemory;
+    }
+  }
+
+  PostFilter post_filter(frame_header, sequence_header, frame_scratch_buffer,
+                         current_frame->buffer(), dsp,
+                         settings_.post_filter_mask);
+
+  if (is_frame_parallel_ && !IsIntraFrame(frame_header.frame_type)) {
+    // We can parse the current frame if all the reference frames have been
+    // parsed.
+    for (const int index : frame_header.reference_frame_index) {
+      if (!state.reference_frame[index]->WaitUntilParsed()) {
+        return kStatusUnknownError;
+      }
+    }
+  }
+
+  // If prev_segment_ids is a null pointer, it is treated as if it pointed to
+  // a segmentation map containing all 0s.
+  const SegmentationMap* prev_segment_ids = nullptr;
+  if (frame_header.primary_reference_frame == kPrimaryReferenceNone) {
+    frame_scratch_buffer->symbol_decoder_context.Initialize(
+        frame_header.quantizer.base_index);
+  } else {
+    const int index =
+        frame_header
+            .reference_frame_index[frame_header.primary_reference_frame];
+    assert(index != -1);
+    const RefCountedBuffer* prev_frame = state.reference_frame[index].get();
+    frame_scratch_buffer->symbol_decoder_context = prev_frame->FrameContext();
+    if (frame_header.segmentation.enabled &&
+        prev_frame->columns4x4() == frame_header.columns4x4 &&
+        prev_frame->rows4x4() == frame_header.rows4x4) {
+      prev_segment_ids = prev_frame->segmentation_map();
+    }
+  }
+
+  // The Tile class must make use of a separate buffer to store the unfiltered
+  // pixels for the intra prediction of the next superblock row. This is done
+  // only when one of the following conditions are true:
+  //   * is_frame_parallel_ is true.
+  //   * settings_.threads == 1.
+  // In the non-frame-parallel multi-threaded case, we do not run the post
+  // filters in the decode loop. So this buffer need not be used.
+  const bool use_intra_prediction_buffer =
+      is_frame_parallel_ || settings_.threads == 1;
+  if (use_intra_prediction_buffer) {
+    if (!frame_scratch_buffer->intra_prediction_buffers.Resize(
+            frame_header.tile_info.tile_rows)) {
+      LIBGAV1_DLOG(ERROR, "Failed to Resize intra_prediction_buffers.");
+      return kStatusOutOfMemory;
+    }
+    IntraPredictionBuffer* const intra_prediction_buffers =
+        frame_scratch_buffer->intra_prediction_buffers.get();
+    for (int plane = kPlaneY; plane < num_planes; ++plane) {
+      const int subsampling =
+          (plane == kPlaneY) ? 0 : sequence_header.color_config.subsampling_x;
+      const size_t intra_prediction_buffer_size =
+          ((MultiplyBy4(frame_header.columns4x4) >> subsampling) *
+           (sequence_header.color_config.bitdepth == 8 ? sizeof(uint8_t)
+                                                       : sizeof(uint16_t)));
+      for (int tile_row = 0; tile_row < frame_header.tile_info.tile_rows;
+           ++tile_row) {
+        if (!intra_prediction_buffers[tile_row][plane].Resize(
+                intra_prediction_buffer_size)) {
+          LIBGAV1_DLOG(ERROR,
+                       "Failed to allocate intra prediction buffer for tile "
+                       "row %d plane %d.\n",
+                       tile_row, plane);
+          return kStatusOutOfMemory;
+        }
+      }
+    }
+  }
+
+  SymbolDecoderContext saved_symbol_decoder_context;
+  BlockingCounterWithStatus pending_tiles(tile_count);
+  for (int tile_number = 0; tile_number < tile_count; ++tile_number) {
+    std::unique_ptr<Tile> tile = Tile::Create(
+        tile_number, tile_buffers[tile_number].data,
+        tile_buffers[tile_number].size, sequence_header, frame_header,
+        current_frame, state, frame_scratch_buffer, wedge_masks_,
+        quantizer_matrix_, &saved_symbol_decoder_context, prev_segment_ids,
+        &post_filter, dsp, threading_strategy.row_thread_pool(tile_number),
+        &pending_tiles, is_frame_parallel_, use_intra_prediction_buffer);
+    if (tile == nullptr) {
+      LIBGAV1_DLOG(ERROR, "Failed to create tile.");
+      return kStatusOutOfMemory;
+    }
+    tiles.push_back_unchecked(std::move(tile));
+  }
+  assert(tiles.size() == static_cast<size_t>(tile_count));
+  if (is_frame_parallel_) {
+    if (frame_scratch_buffer->threading_strategy.thread_pool() == nullptr) {
+      return DecodeTilesFrameParallel(
+          sequence_header, frame_header, tiles, saved_symbol_decoder_context,
+          prev_segment_ids, frame_scratch_buffer, &post_filter, current_frame);
+    }
+    return DecodeTilesThreadedFrameParallel(
+        sequence_header, frame_header, tiles, saved_symbol_decoder_context,
+        prev_segment_ids, frame_scratch_buffer, &post_filter, current_frame);
+  }
+  StatusCode status;
+  if (settings_.threads == 1) {
+    status = DecodeTilesNonFrameParallel(sequence_header, frame_header, tiles,
+                                         frame_scratch_buffer, &post_filter);
+  } else {
+    status = DecodeTilesThreadedNonFrameParallel(tiles, frame_scratch_buffer,
+                                                 &post_filter, &pending_tiles);
+  }
+  if (status != kStatusOk) return status;
+  if (frame_header.enable_frame_end_update_cdf) {
+    frame_scratch_buffer->symbol_decoder_context = saved_symbol_decoder_context;
+  }
+  current_frame->SetFrameContext(frame_scratch_buffer->symbol_decoder_context);
+  SetSegmentationMap(frame_header, prev_segment_ids, current_frame);
+  return kStatusOk;
+}
+
+StatusCode DecoderImpl::ApplyFilmGrain(
+    const ObuSequenceHeader& sequence_header,
+    const ObuFrameHeader& frame_header,
+    const RefCountedBufferPtr& displayable_frame,
+    RefCountedBufferPtr* film_grain_frame, ThreadPool* thread_pool) {
+  if (!sequence_header.film_grain_params_present ||
+      !displayable_frame->film_grain_params().apply_grain ||
+      (settings_.post_filter_mask & 0x10) == 0) {
+    *film_grain_frame = displayable_frame;
+    return kStatusOk;
+  }
+  if (!frame_header.show_existing_frame &&
+      frame_header.refresh_frame_flags == 0) {
+    // If show_existing_frame is true, then the current frame is a previously
+    // saved reference frame. If refresh_frame_flags is nonzero, then the
+    // state_.UpdateReferenceFrames() call above has saved the current frame as
+    // a reference frame. Therefore, if both of these conditions are false, then
+    // the current frame is not saved as a reference frame. displayable_frame
+    // should hold the only reference to the current frame.
+    assert(displayable_frame.use_count() == 1);
+    // Add film grain noise in place.
+    *film_grain_frame = displayable_frame;
+  } else {
+    *film_grain_frame = buffer_pool_.GetFreeBuffer();
+    if (*film_grain_frame == nullptr) {
+      LIBGAV1_DLOG(ERROR,
+                   "Could not get film_grain_frame from the buffer pool.");
+      return kStatusResourceExhausted;
+    }
+    if (!(*film_grain_frame)
+             ->Realloc(displayable_frame->buffer()->bitdepth(),
+                       displayable_frame->buffer()->is_monochrome(),
+                       displayable_frame->upscaled_width(),
+                       displayable_frame->frame_height(),
+                       displayable_frame->buffer()->subsampling_x(),
+                       displayable_frame->buffer()->subsampling_y(),
+                       kBorderPixelsFilmGrain, kBorderPixelsFilmGrain,
+                       kBorderPixelsFilmGrain, kBorderPixelsFilmGrain)) {
+      LIBGAV1_DLOG(ERROR, "film_grain_frame->Realloc() failed.");
+      return kStatusOutOfMemory;
+    }
+    (*film_grain_frame)
+        ->set_chroma_sample_position(
+            displayable_frame->chroma_sample_position());
+    (*film_grain_frame)->set_spatial_id(displayable_frame->spatial_id());
+    (*film_grain_frame)->set_temporal_id(displayable_frame->temporal_id());
+  }
+  const bool color_matrix_is_identity =
+      sequence_header.color_config.matrix_coefficients ==
+      kMatrixCoefficientsIdentity;
+  assert(displayable_frame->buffer()->stride(kPlaneU) ==
+         displayable_frame->buffer()->stride(kPlaneV));
+  const int input_stride_uv = displayable_frame->buffer()->stride(kPlaneU);
+  assert((*film_grain_frame)->buffer()->stride(kPlaneU) ==
+         (*film_grain_frame)->buffer()->stride(kPlaneV));
+  const int output_stride_uv = (*film_grain_frame)->buffer()->stride(kPlaneU);
+#if LIBGAV1_MAX_BITDEPTH >= 10
+  if (displayable_frame->buffer()->bitdepth() > 8) {
+    FilmGrain<10> film_grain(displayable_frame->film_grain_params(),
+                             displayable_frame->buffer()->is_monochrome(),
+                             color_matrix_is_identity,
+                             displayable_frame->buffer()->subsampling_x(),
+                             displayable_frame->buffer()->subsampling_y(),
+                             displayable_frame->upscaled_width(),
+                             displayable_frame->frame_height(), thread_pool);
+    if (!film_grain.AddNoise(
+            displayable_frame->buffer()->data(kPlaneY),
+            displayable_frame->buffer()->stride(kPlaneY),
+            displayable_frame->buffer()->data(kPlaneU),
+            displayable_frame->buffer()->data(kPlaneV), input_stride_uv,
+            (*film_grain_frame)->buffer()->data(kPlaneY),
+            (*film_grain_frame)->buffer()->stride(kPlaneY),
+            (*film_grain_frame)->buffer()->data(kPlaneU),
+            (*film_grain_frame)->buffer()->data(kPlaneV), output_stride_uv)) {
+      LIBGAV1_DLOG(ERROR, "film_grain.AddNoise() failed.");
+      return kStatusOutOfMemory;
+    }
+    return kStatusOk;
+  }
+#endif  // LIBGAV1_MAX_BITDEPTH >= 10
+  FilmGrain<8> film_grain(displayable_frame->film_grain_params(),
+                          displayable_frame->buffer()->is_monochrome(),
+                          color_matrix_is_identity,
+                          displayable_frame->buffer()->subsampling_x(),
+                          displayable_frame->buffer()->subsampling_y(),
+                          displayable_frame->upscaled_width(),
+                          displayable_frame->frame_height(), thread_pool);
+  if (!film_grain.AddNoise(
+          displayable_frame->buffer()->data(kPlaneY),
+          displayable_frame->buffer()->stride(kPlaneY),
+          displayable_frame->buffer()->data(kPlaneU),
+          displayable_frame->buffer()->data(kPlaneV), input_stride_uv,
+          (*film_grain_frame)->buffer()->data(kPlaneY),
+          (*film_grain_frame)->buffer()->stride(kPlaneY),
+          (*film_grain_frame)->buffer()->data(kPlaneU),
+          (*film_grain_frame)->buffer()->data(kPlaneV), output_stride_uv)) {
+    LIBGAV1_DLOG(ERROR, "film_grain.AddNoise() failed.");
+    return kStatusOutOfMemory;
+  }
+  return kStatusOk;
+}
+
+bool DecoderImpl::IsNewSequenceHeader(const ObuParser& obu) {
+  if (std::find_if(obu.obu_headers().begin(), obu.obu_headers().end(),
+                   [](const ObuHeader& obu_header) {
+                     return obu_header.type == kObuSequenceHeader;
+                   }) == obu.obu_headers().end()) {
+    return false;
+  }
+  const ObuSequenceHeader sequence_header = obu.sequence_header();
+  const bool sequence_header_changed =
+      !has_sequence_header_ ||
+      sequence_header_.color_config.bitdepth !=
+          sequence_header.color_config.bitdepth ||
+      sequence_header_.color_config.is_monochrome !=
+          sequence_header.color_config.is_monochrome ||
+      sequence_header_.color_config.subsampling_x !=
+          sequence_header.color_config.subsampling_x ||
+      sequence_header_.color_config.subsampling_y !=
+          sequence_header.color_config.subsampling_y ||
+      sequence_header_.max_frame_width != sequence_header.max_frame_width ||
+      sequence_header_.max_frame_height != sequence_header.max_frame_height;
+  sequence_header_ = sequence_header;
+  has_sequence_header_ = true;
+  return sequence_header_changed;
+}
+
+bool DecoderImpl::MaybeInitializeQuantizerMatrix(
+    const ObuFrameHeader& frame_header) {
+  if (quantizer_matrix_initialized_ || !frame_header.quantizer.use_matrix) {
+    return true;
+  }
+  if (!InitializeQuantizerMatrix(&quantizer_matrix_)) {
+    return false;
+  }
+  quantizer_matrix_initialized_ = true;
+  return true;
+}
+
+}  // namespace libgav1